Task: Find the determining factors for winning rounds in CS:GO
Execution time: 1.5 hours

In this notebook we use a LightGBM model to predict winners with (70% accuracy) and then try to determine what factors were most important. Jump to [Explain predictions](#Explain-predictions).

Ideas/References:
* [Data Transformation](https://www.kaggle.com/christianlillelund/predict-winners-in-cs-go-with-keras-80)
* [Feature Exploration](https://slundberg.github.io/shap/notebooks/tree_explainer/Census%20income%20classification%20with%20LightGBM.html)

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
sns.set_style('whitegrid')
%matplotlib inline

# Load the data
df = pd.read_csv('/kaggle/input/csgo-round-winner-classification/csgo_round_snapshots.csv')

# Split X and y
y = df.round_winner
X = df.drop(['round_winner'], axis=1)

print(f"Total number of samples: {len(X)}")

X.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import power_transform

def encode_targets(y):
    encoder = LabelEncoder()
    encoder.fit(y)
    y_encoded = encoder.transform(y)
    return y_encoded

def encode_inputs(X, object_cols):
    ohe = OneHotEncoder(handle_unknown='ignore', sparse=False)
    X_encoded = pd.DataFrame(ohe.fit_transform(X[object_cols]))
    X_encoded.columns = ohe.get_feature_names(object_cols)
    X_encoded.index = X.index
    return X_encoded

# Use OH encoder to encode predictors
object_cols = ['map', 'bomb_planted']
X_encoded = encode_inputs(X, object_cols)
numerical_X = X.drop(object_cols, axis=1)
X = pd.concat([numerical_X, X_encoded], axis=1)

# Use label encoder to encode targets
y = encode_targets(y)

# Train the model

In [None]:
from sklearn.model_selection import train_test_split
import lightgbm as lgb


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)
d_train = lgb.Dataset(X_train, label=y_train)
d_test = lgb.Dataset(X_test, label=y_test)

In [None]:
params = {
    "max_bin": 512,
    "learning_rate": 0.05,
    "boosting_type": "gbdt",
    "objective": "binary",
    "metric": "binary_logloss",
    "num_leaves": 10,
    "verbose": -1,
    "min_data": 100,
    "boost_from_average": True
}

model = lgb.train(params, d_train, 100000, valid_sets=[d_test], early_stopping_rounds=50, verbose_eval=1000)

# Explain predictions

In [None]:
import shap

# print the JS visualization code to the notebook
shap.initjs()

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)

In [None]:
shap.force_plot(explainer.expected_value[1], shap_values[1][0,:], X.iloc[0,:])

In [None]:
shap.force_plot(explainer.expected_value[1], shap_values[1][:1000,:], X.iloc[:1000,:])

In [None]:
shap.summary_plot(shap_values, X)

In [None]:
for name in X_train.columns:
    shap.dependence_plot(name, shap_values[1], X, display_features=X)