# Task for Today  

***

## Video Game ESRB Rating Prediction  

Given *data about video games*, let's try to predict the **ESRB rating** of a given game.

We will use a variety of classification models to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
pd.set_option('max_columns', None)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
train_df = pd.read_csv('../input/video-games-rating-by-esrb/Video_games_esrb_rating.csv')
test_df = pd.read_csv('../input/video-games-rating-by-esrb/test_esrb.csv')

In [None]:
train_df

In [None]:
train_df.info()

# Preprocessing

In [None]:
def preprocess_inputs(df, scaler, split='train'):
    df = df.copy()
    
    # Drop title column
    df = df.drop('title', axis=1)
    
    # Shuffle the data
    df = df.sample(frac=1.0, random_state=1).reset_index(drop=True)
    
    # Split df into X and y
    y = df['esrb_rating']
    X = df.drop('esrb_rating', axis=1)
    
    # Scale X
    if split == 'train':
        scaler.fit(X)
    X = pd.DataFrame(scaler.transform(X), index=X.index, columns=X.columns)
    
    return X, y

In [None]:
scaler = StandardScaler()

X_train, y_train = preprocess_inputs(train_df, scaler=scaler, split='train')
X_test, y_test = preprocess_inputs(test_df, scaler=scaler, split='test')

In [None]:
X_train

In [None]:
y_train

# Training

In [None]:
models = {
    "                   Logistic Regression": LogisticRegression(),
    "                   K-Nearest Neighbors": KNeighborsClassifier(),
    "                         Decision Tree": DecisionTreeClassifier(),
    "Support Vector Machine (Linear Kernel)": LinearSVC(),
    "   Support Vector Machine (RBF Kernel)": SVC(),
    "                        Neural Network": MLPClassifier(),
    "                         Random Forest": RandomForestClassifier(),
    "                     Gradient Boosting": GradientBoostingClassifier(),
    "                               XGBoost": XGBClassifier(eval_metric='mlogloss'),
    "                              LightGBM": LGBMClassifier(),
    "                              CatBoost": CatBoostClassifier(verbose=0)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

# Results

In [None]:
for name, model in models.items():
    print(name + " Accuracy: {:.3f}%".format(model.score(X_test, y_test) * 100))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/IoAL3QYcgcw