In [59]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from statsmodels.stats.outliers_influence import variance_inflation_factor

# CS:GO Round Winner Classification


URL: https://www.kaggle.com/christianlillelund/csgo-round-winner-classification

In [60]:
df_cs = pd.read_csv(os.path.join('dataset', 'csgo_round_snapshots.csv'))
df_cs.shape

(122410, 97)

In [61]:
df_cs.head()

Unnamed: 0,time_left,ct_score,t_score,map,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,...,t_grenade_flashbang,ct_grenade_smokegrenade,t_grenade_smokegrenade,ct_grenade_incendiarygrenade,t_grenade_incendiarygrenade,ct_grenade_molotovgrenade,t_grenade_molotovgrenade,ct_grenade_decoygrenade,t_grenade_decoygrenade,round_winner
0,175.0,0.0,0.0,de_dust2,False,500.0,500.0,0.0,0.0,4000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
1,156.03,0.0,0.0,de_dust2,False,500.0,500.0,400.0,300.0,600.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
2,96.03,0.0,0.0,de_dust2,False,391.0,400.0,294.0,200.0,750.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
3,76.03,0.0,0.0,de_dust2,False,391.0,400.0,294.0,200.0,750.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CT
4,174.97,1.0,0.0,de_dust2,False,500.0,500.0,192.0,0.0,18350.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CT


In [62]:
df_cs.describe()

Unnamed: 0,time_left,ct_score,t_score,ct_health,t_health,ct_armor,t_armor,ct_money,t_money,ct_helmets,...,ct_grenade_flashbang,t_grenade_flashbang,ct_grenade_smokegrenade,t_grenade_smokegrenade,ct_grenade_incendiarygrenade,t_grenade_incendiarygrenade,ct_grenade_molotovgrenade,t_grenade_molotovgrenade,ct_grenade_decoygrenade,t_grenade_decoygrenade
count,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,...,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0,122410.0
mean,97.886922,6.709239,6.780435,412.106568,402.7145,314.142121,298.44467,9789.023773,11241.03668,2.053901,...,1.853157,1.8581,1.540814,1.627146,1.001969,0.019819,0.048011,1.352095,0.027694,0.02575
std,54.465238,4.790362,4.823543,132.29329,139.919033,171.029736,174.576545,11215.042286,12162.806759,1.84147,...,1.772791,1.794473,1.737804,1.829147,1.458084,0.143933,0.227669,1.663246,0.169531,0.164162
min,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,54.92,3.0,3.0,350.0,322.0,194.0,174.0,1300.0,1550.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,94.91,6.0,6.0,500.0,500.0,377.0,334.0,5500.0,7150.0,2.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
75%,166.9175,10.0,10.0,500.0,500.0,486.0,468.0,14600.0,18000.0,4.0,...,3.0,3.0,3.0,3.0,2.0,0.0,0.0,2.0,0.0,0.0
max,175.0,32.0,33.0,500.0,600.0,500.0,500.0,80000.0,80000.0,5.0,...,7.0,7.0,6.0,9.0,5.0,3.0,3.0,5.0,3.0,2.0


In [63]:
df_cs['round_winner'] = df_cs['round_winner'].map({'CT': 1, 'T': 0})
df_cs['bomb_planted'] = df_cs['bomb_planted'].astype(int)

In [64]:
# default 0.25 test
train, test = train_test_split(df_cs)

# p = 1

In [89]:
train_input_var = train['time_left'].values.reshape(-1, 1)
train_input_target = train['round_winner'].astype(int)

test_input_var = test['time_left'].values.reshape(-1, 1)
test_input_target = test['round_winner'].astype(int)

scaler = StandardScaler()
train_input_var = scaler.fit_transform(train_input_var)
test_input_var = scaler.transform(test_input_var)

In [90]:
model = LinearDiscriminantAnalysis()
model = model.fit(train_input_var, train_input_target)

## Train eval

In [91]:
model.score(train_input_var, train_input_target)

0.5205049723877264

In [92]:
confusion_matrix(train_input_target, model.predict(train_input_var).astype(float))

array([[29928, 16913],
       [27108, 17858]])

## Test eval

In [93]:
model.score(test_input_var, test_input_target)

0.5194915531157076

In [94]:
confusion_matrix(test_input_target, model.predict(test_input_var).astype(float))

array([[10003,  5562],
       [ 9143,  5895]])

# p > 1

In [95]:
train_input_var = train.drop(['round_winner', 'map'], axis=1)
train_input_target = train['round_winner'].astype(int)

test_input_var = test.drop(['round_winner', 'map'], axis=1)
test_input_target = test['round_winner'].astype(int)

scaler = StandardScaler()
train_input_var = scaler.fit_transform(train_input_var)
test_input_var = scaler.transform(test_input_var)

In [96]:
model = LinearDiscriminantAnalysis()
model = model.fit(train_input_var, train_input_target)

In [97]:
train.columns[np.argmax(model.coef_)]

't_health'

## Train eval

In [98]:
model.score(train_input_var, train_input_target)

0.7491258836472164

In [99]:
confusion_matrix(train_input_target, model.predict(train_input_var).astype(float))

array([[34444, 12397],
       [10635, 34331]])

## Test eval

In [100]:
model.score(test_input_var, test_input_target)

0.7465607946933307

In [101]:
confusion_matrix(test_input_target, model.predict(test_input_var).astype(float))

array([[11424,  4141],
       [ 3615, 11423]])

# p > 1 without collinear

In [None]:
vif = pd.DataFrame()
drop_for_vif = df_cs.drop(['round_winner', 'map'], axis=1)
vif['feature'] = drop_for_vif.columns
vif['vif'] = [variance_inflation_factor(drop_for_vif.values, i) for i in range(len(drop_for_vif.columns))]

In [44]:
vif_sorted = vif.sort_values('vif', ascending=False)
vif_sorted[vif_sorted['vif'] > 10]

Unnamed: 0,feature,vif
13,ct_players_alive,1833.947467
14,t_players_alive,1488.654508
75,ct_weapon_usps,1023.684843
34,t_weapon_glock,919.772526
4,ct_health,239.392105
5,t_health,149.779529
71,ct_weapon_deagle,73.775086
11,t_helmets,61.813804
72,t_weapon_deagle,56.260972
7,t_armor,33.977527


In [45]:
df_cs_without_collinear = df_cs.drop(vif_sorted[vif_sorted['vif'] > 10]['feature'], axis=1)
# default 0.25 test
train, test = train_test_split(df_cs_without_collinear)

In [46]:
train_input_var = train.drop(['round_winner', 'map'], axis=1)
train_input_target = train['round_winner'].astype(int)

test_input_var = test.drop(['round_winner', 'map'], axis=1)
test_input_target = test['round_winner'].astype(int)

scaler = StandardScaler()
train_input_var = scaler.fit_transform(train_input_var)
test_input_var = scaler.transform(test_input_var)

In [47]:
model = LinearDiscriminantAnalysis()
model = model.fit(train_input_var, train_input_target)

In [57]:
train.columns[np.argmax(model.coef_)]

array([[-2.92673849e-03,  1.01638018e-03, -1.35002344e+00,
         1.50412738e-05, -2.22618103e-05, -6.04032069e-03,
         4.76920279e-01,  4.93085878e-01, -4.45893511e-01,
         5.57316463e-01, -6.37389664e-01,  2.74039000e-16,
         1.58483402e-01,  2.74831683e-01, -3.68613949e-01,
         4.12717028e-01, -3.82112134e-01, -3.26108287e-17,
         1.37662906e-02,  6.06840096e-01, -3.43165344e-01,
        -2.15411505e-01, -2.96304191e+00,  6.49580049e-17,
         4.40295052e-01, -7.39574646e-01,  4.31627253e-01,
        -3.68327048e-01,  2.30573359e-01, -1.83261203e-01,
         3.46640146e-01, -1.64291250e+00,  5.64497753e-01,
        -4.95189255e-01,  1.19008378e-01,  1.47388517e-01,
         3.67880794e-01, -2.71798724e-01, -2.39858586e-17,
         1.05876711e+00,  6.58093709e-01,  1.04351067e+00,
         1.62573858e+00, -7.55947974e-01,  1.05150073e-17,
         8.42477336e-01,  2.49524721e-17, -1.75221550e+00,
        -6.50201262e-02,  3.91533818e+00,  5.03325768e-0

## Train eval

In [48]:
model.score(train_input_var, train_input_target)

0.6953282429444377

In [49]:
confusion_matrix(train_input_target, model.predict(train_input_var).astype(float))

array([[33812, 12925],
       [15046, 30024]])

## Test eval

In [50]:
model.score(test_input_var, test_input_target)

0.6975459922229846

In [51]:
confusion_matrix(test_input_target, model.predict(test_input_var).astype(float))

array([[11476,  4193],
       [ 5063,  9871]])