In [None]:
import tensorflow as tf
from tensorflow import keras
import sklearn.model_selection as ms
import sklearn.preprocessing as pp
import sklearn.tree as tree
import sklearn.ensemble as ensemble

import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
data = pd.read_csv(r'../input/video-games-rating-by-esrb/Video_games_esrb_rating.csv')
data.head()

In [None]:
test_data = pd.read_csv(r'../input/video-games-rating-by-esrb/test_esrb.csv')
test_data.head()

In [None]:
data.info()

# Data Preparation

In [None]:
X = data.copy()
X.drop(['title', 'esrb_rating'], axis=1, inplace=True)

y = data['esrb_rating'].copy()
y

In [None]:
X_test = test_data.copy()
X_test.drop(['title', 'esrb_rating'], axis=1, inplace=True)

y_test = test_data['esrb_rating'].copy()
y_test

In [None]:
y_total = y.append(y_test, ignore_index=True)
y_total

In [None]:
y_total.unique()

In [None]:
y_encoder = pp.LabelEncoder()
y_transform = y_encoder.fit_transform(y_total)
y_transform

In [None]:
y = y_transform[:1895]
y_test = y_transform[1895:]

print(len(y))
print(len(y_test))

In [None]:
# checking that values in all the columns are binary
for col in X.columns:
    print(col)
    print(X[col].unique())
    print()

In [None]:
# checking that values in all the columns are binary
for col in X_test.columns:
    print(col)
    print(X_test[col].unique())
    print()

In [None]:
X_train, X_validation, y_train, y_validation = ms.train_test_split(X, y, random_state=42, train_size=0.8)

In [None]:
X_input_shape = X_train.shape[1:]
X_input_shape

# Neural Network Model with ReLU activation function

In [None]:
base_model = keras.models.Sequential([
                    keras.layers.Input(shape= X_input_shape, name='input_layer'),
                    keras.layers.Dense(100, activation='relu'),
                    keras.layers.Dense(50, activation='relu'),
                    keras.layers.Dense(25, activation='relu'),
                    keras.layers.Dense(4, activation='softmax', name='output_layer'),
    
])

base_model.summary()

In [None]:
base_model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer='sgd',
            metrics= [keras.metrics.sparse_categorical_accuracy])

history = base_model.fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation))

In [None]:
base_model.evaluate(X_test, y_test)

# Neural Network Model with ELU activation function

In [None]:
new_model = keras.models.Sequential([
                    keras.layers.Input(shape= X_input_shape, name='input_layer'),
                    keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'),
                    keras.layers.Dense(50, activation='elu', kernel_initializer='he_normal'),
                    keras.layers.Dense(25, activation='elu', kernel_initializer='he_normal'),
                    keras.layers.Dense(4, activation='softmax', name='output_layer'),
])

new_model.summary()

In [None]:
new_model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer='sgd',
            metrics= [keras.metrics.sparse_categorical_accuracy])

new_history = new_model.fit(X_train, y_train, epochs=30, validation_data=(X_validation, y_validation))

In [None]:
new_model.evaluate(X_test, y_test)

# Traditional Models

In [None]:
tree_clf = tree.DecisionTreeClassifier(random_state=42)
rf_clf = ensemble.RandomForestClassifier(random_state=42)
gb_clf = ensemble.GradientBoostingClassifier(random_state=42)
ext_clf = ensemble.ExtraTreesClassifier(random_state=42)

voting_classifier = ensemble.VotingClassifier([
                        ('tree_clf', tree.DecisionTreeClassifier(random_state=42)),
                        ('rf_clf', ensemble.RandomForestClassifier(random_state=42)),
                        ('gb_clf', ensemble.GradientBoostingClassifier(random_state=42)),
                        ('ext_clf', ensemble.ExtraTreesClassifier(random_state=42))
                        ], voting='hard')

list_of_classifier = [tree_clf, rf_clf, gb_clf, ext_clf, voting_classifier]

In [None]:
for estimator in list_of_classifier:
    estimator.fit(X_train, y_train)
    print(estimator.__class__.__name__)
    print('Score on train data')
    print(round(estimator.score(X_train, y_train)*100,1))
    print('Score on validation data')
    print(round(estimator.score(X_validation, y_validation)*100,1))
    print('Score on test data')
    print(round(estimator.score(X_test, y_test)*100,1))
    print()

# Summary and Commentary

* NN model with ELU activation function perform better than the NN model with ReLU activation function
* RandomForest model perform the best with an accuracy score of 85% on the testing data though the NN model with with ELU activation score perform the best on the validation data


| Models             	| Score on Train Data 	| Score on Validation Data 	| Score on Testing Data 	|
|--------------------	|---------------------	|--------------------------	|-----------------------	|
| NN model with ReLU 	| 81.1                	| 83.1                     	| 75.6                  	|
| NN model with ELU  	| 85.5                	| 87.1                     	| 79.6                  	|
| DecisionTree       	| 92.5                	| 85.2                     	| 83.2                  	|
| RandomForest       	| 92.5                	| 85.2                     	| 85.0                  	|
| GradientBoosting   	| 88.7                	| 84.4                     	| 79.2                  	|
| ExtraTrees         	| 92.5                	| 86.3                     	| 84.6                  	|
| VotingClassifier   	| 92.5                	| 85.5                     	| 84.2                  	|
