In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout

In [2]:
ds = pd.read_csv('/kaggle/input/card-12-games/games.csv')
ds

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8,322.0,Nintendo,E
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8,192.0,Nintendo,E
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16714,Samurai Warriors: Sanada Maru,PS3,2016.0,Action,Tecmo Koei,0.00,0.00,0.01,0.00,0.01,,,,,,
16715,LMA Manager 2007,X360,2006.0,Sports,Codemasters,0.00,0.01,0.00,0.00,0.01,,,,,,
16716,Haitaka no Psychedelica,PSV,2016.0,Adventure,Idea Factory,0.00,0.00,0.01,0.00,0.01,,,,,,
16717,Spirits & Spells,GBA,2003.0,Platform,Wanadoo,0.01,0.00,0.00,0.00,0.01,,,,,,


In [3]:
toDrop = ['Name', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Developer']
for i in toDrop:
    ds = ds.drop(i, axis=1)
ds

Unnamed: 0,Platform,Year_of_Release,Genre,Publisher,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Rating
0,Wii,2006.0,Sports,Nintendo,82.53,76.0,51.0,8,322.0,E
1,NES,1985.0,Platform,Nintendo,40.24,,,,,
2,Wii,2008.0,Racing,Nintendo,35.52,82.0,73.0,8.3,709.0,E
3,Wii,2009.0,Sports,Nintendo,32.77,80.0,73.0,8,192.0,E
4,GB,1996.0,Role-Playing,Nintendo,31.37,,,,,
...,...,...,...,...,...,...,...,...,...,...
16714,PS3,2016.0,Action,Tecmo Koei,0.01,,,,,
16715,X360,2006.0,Sports,Codemasters,0.01,,,,,
16716,PSV,2016.0,Adventure,Idea Factory,0.01,,,,,
16717,GBA,2003.0,Platform,Wanadoo,0.01,,,,,


In [4]:
ds.isnull().sum()

Platform              0
Year_of_Release     269
Genre                 2
Publisher            54
Global_Sales          0
Critic_Score       8582
Critic_Count       8582
User_Score         6704
User_Count         9129
Rating             6769
dtype: int64

In [5]:
catCols = ['Year_of_Release', 'Genre', 'Publisher', 'Rating']
for i in catCols:
    mode_val = ds[i].mode()[0]
    ds[i] = ds[i].fillna(mode_val)
ds.isnull().sum()

Platform              0
Year_of_Release       0
Genre                 0
Publisher             0
Global_Sales          0
Critic_Score       8582
Critic_Count       8582
User_Score         6704
User_Count         9129
Rating                0
dtype: int64

In [6]:
ds.loc[ds['User_Score'] == 'tbd', 'User_Score'] = None
ds['User_Score'] = ds['User_Score'].astype(float)

In [7]:
numCols = ['Critic_Score', 'Critic_Count', 'User_Score', 'User_Count']
for i in numCols:
    ds[i] = ds[i].fillna(ds[i].mean())

In [8]:
ds.isnull().sum()

Platform           0
Year_of_Release    0
Genre              0
Publisher          0
Global_Sales       0
Critic_Score       0
Critic_Count       0
User_Score         0
User_Count         0
Rating             0
dtype: int64

In [9]:
X = ds.iloc[:, [0, 1, 2, 3, 5, 6, 7, 8, 9]].values
y = ds.iloc[:, 4].values

In [10]:
toOneHotCols = [0, 2, 3, 8]
onehotencoder = ColumnTransformer(transformers = [("OneHot", OneHotEncoder(), toOneHotCols)], remainder = 'passthrough')
X = onehotencoder.fit_transform(X).toarray()

In [11]:
X.shape

(16719, 637)

In [12]:
inputNeurons = X.shape[1]
hiddenNeurons = int(inputNeurons/2)
input_layer = Input(shape=(inputNeurons,))
hidden_layer1 = Dense(units = hiddenNeurons, activation='relu')(input_layer)
dropout_layer1 = Dropout(rate = 0.2)(hidden_layer1)
hidden_layer2 = Dense(units = hiddenNeurons, activation='relu')(dropout_layer1)
dropout_layer2 = Dropout(rate = 0.2)(hidden_layer2)
output_layer = Dense(units = 1, activation='linear')(dropout_layer2)

regressor = Model(inputs = input_layer, outputs = output_layer)
regressor.compile(optimizer = 'adam', loss = 'mse')

In [13]:
regressor.fit(X, y, epochs = 1000, batch_size = 200)

Epoch 1/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 6025.4160
Epoch 2/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 413.8965
Epoch 3/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 132.8815
Epoch 4/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 59.0012
Epoch 5/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 31.3883
Epoch 6/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 22.8351
Epoch 7/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 11.9124
Epoch 8/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 11.0369
Epoch 9/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 7.9605
Epoch 10/1000
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0

<keras.src.callbacks.history.History at 0x79c4b996d930>

In [14]:
predictions = regressor.predict(X)

[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [15]:
from sklearn.metrics import mean_absolute_error

In [16]:
mean_absolute_error(y, predictions)

0.44303176252549187