<a href="https://kritikseth.github.io/redirect" target="_parent"><img src="https://raw.githack.com/kritikseth/kritikseth/master/redirect.svg" alt="Kritik Seth"/></a>

In [None]:
import pandas as pd
import numpy as np
from pandasql import sqldf

import math
from scipy import stats
from scipy.stats import geom
from sklearn.linear_model import LinearRegression
from statsmodels.stats.power import TTestIndPower, ttest_power

import re
from tqdm.notebook import tqdm

import seaborn as sns
from plotly import tools
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as py
import matplotlib.pyplot as plt
%matplotlib inline

import gower
import lightgbm as lgb
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from category_encoders import TargetEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, roc_auc_score, classification_report, mean_absolute_error

import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category = DeprecationWarning)

R_STATE = 18714836 # random state

## Classification

In [None]:
df = pd.read_csv('transformed_fps.csv')

In [None]:
df.shape

In [None]:
drop_cols = ['id', 'CPU Name', 'GPU Name','GPU Open GL','CPU Model','Dataset'] #GPU Open GL has only one value
label_cols = ['CPU Brand', 'CPU Series',
              'CPU Type', 'CPU Multiplier Unlocked',
              'GPU Architecture', 'GPU Bus', 'GPU Memory Type',
              'GPU Open CL','GPU Shader Model', 'GPU Vulkan', 'Game',
              ]

auto_ordinal_cols = ['Game Resolution', 'CPU Base Clock', 'GPU Direct X'] # Questionable: CPU Base Clock, 

#df['CPU Model'] = df['CPU Model'].replace({'X2': 0, '3': 1, 'i3': 1, '5': 2, 'i5': 2, '7': 3, 'i7': 3, '9': 4, 'Threadripper': 5})

df = df.drop(drop_cols, axis=1)



In [None]:
df = pd.get_dummies(df, columns=label_cols)

In [None]:
df

In [None]:
enc_dict_resolution = {720:0,1080:1,1440:2}
df['Game Resolution'] = df['Game Resolution'].map(enc_dict_resolution)

enc_dict_CPU_base_clock = {100:0,133:1,200:2}
df['CPU Base Clock'] = df['CPU Base Clock'].map(enc_dict_CPU_base_clock)

df['GPU Direct X'] = df['GPU Direct X'].astype(str)
enc_dict_direct = {'12':0,'12 Ultimate':1}
df['GPU Direct X'] = df['GPU Direct X'].map(enc_dict_direct)

In [None]:
def plot_ROC_curve(model, xtrain, ytrain, xtest, ytest):

    # Creating visualization with the readable labels
    visualizer = ROCAUC(model)
                                        
    # Fitting to the training data first then scoring with the test data                                    
    visualizer.fit(xtrain, ytrain)
    visualizer.score(xtest, ytest)
    visualizer.show()
    
    return visualizer

def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
    
    #creating a set of all the unique classes using the actual class list
    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        
        #creating a list of all the classes except the current class 
        other_class = [x for x in unique_class if x != per_class]

        #marking the current class as 1 and all other classes as 0
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]

        #using the sklearn metrics method to calculate the roc_auc_score
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc

    return roc_auc_dict

In [None]:
#full dataset
rfc = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=R_STATE, verbose=0,
                      warm_start=False)
df1=df.drop("Game Settings",axis=1)
X = df1
y = df['Game Settings']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=R_STATE)
rfc.fit(X_train, y_train) 

y_pred=rfc.predict(X_test)
target_names = ['low', 'med', 'high','max']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
plot_ROC_curve(rfc, X_train, y_train, X_test, y_test)

In [None]:
#with 25 components
rfc = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=R_STATE, verbose=0,
                      warm_start=False)
df1=df.drop("Game Settings",axis=1)
X = df1
y = df['Game Settings']
steps = [('pca', PCA(n_components=25)), ('m', rfc)]
model = Pipeline(steps=steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=R_STATE)
model.fit(X_train, y_train) 

y_pred=model.predict(X_test)
target_names = ['low', 'med', 'high','max']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
plot_ROC_curve(model, X_train, y_train, X_test, y_test)

In [None]:
#with 30 components
rfc = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=R_STATE, verbose=0,
                      warm_start=False)
df1=df.drop("Game Settings",axis=1)
X = df1
y = df['Game Settings']
steps = [('pca', PCA(n_components=30)), ('m', rfc)]
model = Pipeline(steps=steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=R_STATE)
model.fit(X_train, y_train) 

y_pred=model.predict(X_test)
target_names = ['low', 'med', 'high','max']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
plot_ROC_curve(model, X_train, y_train, X_test, y_test)

In [None]:
#lightgbm full dataset
df1=df.drop("Game Settings",axis=1)
X = df1
y = df['Game Settings']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=R_STATE)
model = lgb.LGBMClassifier(learning_rate=0.09,max_depth=-5,random_state=R_STATE)
model.fit(X_train,y_train)

y_pred=model.predict(X_test)
target_names = ['low', 'med', 'high','max']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
plot_ROC_curve(model, X_train, y_train, X_test, y_test)

In [None]:
#25 components

lgbm_full = lgb.LGBMClassifier(learning_rate=0.09,max_depth=-5,random_state=R_STATE)
df1=df.drop("Game Settings",axis=1)
X = df1
y = df['Game Settings']
steps = [('pca', PCA(n_components=25)), ('m', lgbm_full)]
model = Pipeline(steps=steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=R_STATE)
model.fit(X_train, y_train) 

y_pred=model.predict(X_test)
target_names = ['low', 'med', 'high','max']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
plot_ROC_curve(model, X_train, y_train, X_test, y_test)

In [None]:
#30 components

lgbm_full = lgb.LGBMClassifier(learning_rate=0.09,max_depth=-5,random_state=R_STATE)
df1=df.drop("Game Settings",axis=1)
X = df1
y = df['Game Settings']
steps = [('pca', PCA(n_components=30)), ('m', lgbm_full)]
model = Pipeline(steps=steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=R_STATE)
model.fit(X_train, y_train) 

y_pred=model.predict(X_test)
target_names = ['low', 'med', 'high','max']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
plot_ROC_curve(model, X_train, y_train, X_test, y_test)

<a href="https://kritikseth.github.io/redirect" target="_parent"><img src="https://raw.githack.com/kritikseth/kritikseth/master/redirect.svg" alt="Kritik Seth"/></a>