# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [None]:
df = pd.read_csv("csgo_round_snapshots.csv")
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum().sum()

In [None]:
df.info()

# Preprocess

In [None]:
df["bomb_planted"] = df["bomb_planted"].astype(np.int8)

In [None]:
def label_encoder(column):
    le = LabelEncoder().fit(column)
    print(column.name, le.classes_)
    return le.transform(column)

In [None]:
for col in ["map", "round_winner"]:
    df[col] = label_encoder(df[col])

In [None]:
X = df.drop("round_winner", axis=1)
y = df["round_winner"]

In [None]:
ss = StandardScaler()
X_scaled = ss.fit_transform(X)

In [None]:
plt.figure()
sns.heatmap(X.corr())
plt.show()

In [None]:
pca = PCA(n_components=96)
pca.fit(X_scaled)

In [None]:
pca.explained_variance_ratio_

In [None]:
plt.figure(figsize=(10, 20))
plt.barh(X.columns, pca.explained_variance_ratio_)
plt.show()

In [None]:
plt.figure()
plt.hist(pca.explained_variance_ratio_, bins=96)
plt.show()

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.axhline(0.90, c="r")
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Explained Variance")
plt.show()

In [None]:
X_pca = pca.transform(X_scaled)[:, 0:60]

In [None]:
data = pd.DataFrame(X_pca)
data.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Models

In [None]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
pred_logreg = logreg.predict(X_test)
cr_logreg = classification_report(y_test, pred_logreg)
ac_logreg = accuracy_score(y_test, pred_logreg)
print(cr_logreg)

In [None]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
pred_dt = dt.predict(X_test)
cr_dt = classification_report(y_test, pred_dt)
ac_dt = accuracy_score(y_test, pred_dt)
print(cr_dt)

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
cr_rf = classification_report(y_test, pred_rf)
ac_rf = accuracy_score(y_test, pred_rf)
print(cr_rf)

In [None]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
pred_xgb = xgb.predict(X_test)
cr_xgb = classification_report(y_test, pred_xgb)
ac_xgb = accuracy_score(y_test, pred_xgb)
print(cr_xgb)

In [None]:
models = ["Logistic Regression", "Decision Tree", "Random Forest", "XGB"]
scores = [ac_logreg, ac_dt, ac_rf, ac_xgb]

In [None]:
models_df = pd.DataFrame({"Model": models, "Score": scores})
models_df.head()

In [None]:
plt.figure()
ax = sns.barplot(data=models_df, x="Model", y="Score")
for container in ax.containers:
    ax.bar_label(container)
plt.show()