In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(100)

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [6]:
df = pd.read_csv("vgsales.csv")
print(df.shape)
df.describe(include='all')

df = df.dropna()
df = df.drop(columns=['Rank', 'Name', 'NA_Sales', 'JP_Sales', 'Other_Sales'], axis=1)
df = pd.get_dummies(df, columns=['Platform'], drop_first=True, prefix='Platform')
df = pd.get_dummies(df, columns=['Genre'], drop_first=True, prefix='Genra')
df = pd.get_dummies(df, columns=['Publisher'], drop_first=True, prefix='Publisher')
df.head()

(16598, 11)


Unnamed: 0,Year,EU_Sales,Global_Sales,Platform_3DO,Platform_3DS,Platform_DC,Platform_DS,Platform_GB,Platform_GBA,Platform_GC,...,Publisher_Zushi Games,Publisher_bitComposer Games,Publisher_dramatic create,Publisher_fonfun,Publisher_iWin,Publisher_id Software,Publisher_imageepoch Inc.,Publisher_inXile Entertainment,"Publisher_mixi, Inc",Publisher_responDESIGN
0,2006.0,29.02,82.74,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,1985.0,3.58,40.24,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,2008.0,12.88,35.82,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,2009.0,11.01,33.0,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,1996.0,8.89,31.37,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [7]:
target_variable = ['EU_Sales']
predictors = list(set(list(df.columns))-set(target_variable))
X = df[predictors].values
X = X.astype(float)
y = df[target_variable].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=100)

print(X_train.shape); print(X_test.shape)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(32, input_shape = (X_train.shape[1],), activation = 'relu'))
model.add(tf.keras.layers.Dense(16, activation= "relu"))
model.add(tf.keras.layers.Dense(8, activation= "relu"))
model.add(tf.keras.layers.Dense(4, activation= "relu"))
model.add(tf.keras.layers.Dense(1))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss = 'mae', metrics = ['mae'], optimizer = optimizer)

model.fit(X_train, y_train, epochs=50)

print(model.evaluate(X_train, y_train))

print(model.evaluate(X_test, y_test))

(11403, 618)
(4888, 618)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 2.8472 - mae: 2.8472
Epoch 2/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1365 - mae: 0.1365
Epoch 3/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1365 - mae: 0.1365
Epoch 4/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.1365 - mae: 0.1365
Epoch 5/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.1366 - mae: 0.1366
Epoch 6/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.1365 - mae: 0.1365
Epoch 7/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.1365 - mae: 0.1365
Epoch 8/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.1365 - mae: 0.1365
Epoch 9/50
[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms

In [8]:
print(model.evaluate(X_train, y_train))

[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1450 - mae: 0.1450
[0.14126558601856232, 0.14126558601856232]


In [9]:
print(model.evaluate(X_test, y_test))

[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1574 - mae: 0.1574
[0.14861063659191132, 0.14861063659191132]


In [10]:
pred_train = model.predict(X_train)
r2_score(y_train, pred_train)

[1m357/357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


-0.05104370877514719

In [11]:
pred_test = model.predict(X_test)
r2_score(y_test, pred_test)

[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


-0.07801087605862