In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

## Read Data

In [None]:
df = pd.read_csv("/content/drive/MyDrive/league/final_data.csv")

## Drop Unnecessary Features

In [None]:
df=df.drop(["championId","teamId","allInPings","needVisionPings","sightWardsBoughtInGame","unrealKills","teamEarlySurrendered","totalHealsOnTeammates","riotIdTagline","riotIdName","summonerLevel","summoner1Casts","summoner1Id","summoner2Casts","summoner2Id","pushPings","assistMePings","baitPings","basicPings","totalTimeSpentDead","championTransform","commandPings","dangerPings","eligibleForProgression","enemyMissingPings","enemyVisionPings","getBackPings","holdPings","onMyWayPings","profileIcon","nexusLost","gameEndedInSurrender","nexusTakedowns","bountyLevel","nexusKills","participantId","challenges","lane","perks","puuid","role","summonerId","summonerName","teamPosition","longestTimeSpentLiving","goldSpent","spell1Casts","spell2Casts","spell3Casts","spell4Casts","item0","item1","item2","item3","item4","item5","item6"],axis=1)

## EDA

### Check Duplicate

In [None]:
df[df.duplicated()]

In [None]:
df.info()

### Check Missing values

In [None]:
df.isna().sum()

### Check label balance

In [None]:
df["win"].value_counts()

### Label Encoding

In [None]:
df["win"] = df["win"].replace({True: 1, False: 0})

### Numerical Values

#### Outlier Check

In [None]:
numerical = df.select_dtypes(include=["int64","float64","bool"]).columns.tolist()

In [None]:
len(numerical)

In [None]:
fig, axs = plt.subplots(nrows=6, ncols=5, figsize=(15, 15))
axs = axs.flatten()

# Iterate through all the numerical columns and create a plot for each one
for i, col in enumerate(numerical[:30]):
    sns.histplot(x=df[col], ax=axs[i])
plt.tight_layout()



In [None]:
fig, axs = plt.subplots(nrows=6, ncols=6, figsize=(15, 15))
axs = axs.flatten()

for i, col in enumerate(numerical[30:]):
    sns.histplot(x=df[col], ax=axs[i])
plt.tight_layout()
plt.show()

#### Correlation with Target

In [None]:
cor=df.corrwith(df["win"]).sort_values(ascending=False)

In [None]:
plt.figure(figsize=(8, 12))
sns.barplot(x=cor.values, y=cor.index)


## Feature Engineer

### Catgorical Feature encoding

In [None]:
cat_columns = df.select_dtypes(['object']).columns

In [None]:
df = pd.get_dummies(df,columns=cat_columns)

### Create New Features

#### KDA

In [None]:
df["deaths"] = df["deaths"].replace(0, 1)

In [None]:
df["kda"]=(df["kills"]+df["assists"])/df["deaths"]

#### Convert features into minutes

In [None]:
feature=["goldEarned","totalDamageDealt","totalMinionsKilled","totalHeal","champExperience","damageDealtToObjectives"]

In [None]:
df[feature].head()

#### Convert time from seconds to minutes

In [None]:
df["timePlayed"]=df["timePlayed"]/60

In [None]:
df['timePlayed'] = df['timePlayed'].round(1)

In [None]:
for i in feature:
  df[i]=df[i]/df['timePlayed']

## Train, Validation, Test Set Split

In [None]:
X = df.drop(["win","kills","deaths","assists"],axis=1)
y = df['win']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import confusion_matrix
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.80)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

In [None]:
y_pred=logreg.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
metrics.accuracy_score(y_test, y_pred)

In [None]:
y_pred_proba = logreg.predict_proba(X_test)[:, 1]
metrics.roc_auc_score(y_test, y_pred_proba)

In [None]:
confusion_matrix = confusion_matrix(y_test, y_pred)

In [None]:
sns.heatmap(confusion_matrix, annot=True, fmt="d")

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(X_train, y_train)

In [None]:
y_pred=rfc.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
metrics.accuracy_score(y_test, y_pred)

In [None]:
y_pred_proba = rfc.predict_proba(X_test)[:, 1]
metrics.roc_auc_score(y_test, y_pred_proba)

In [None]:
confusion_matrix = confusion_matrix(y_test, y_pred)

In [None]:
sns.heatmap(confusion_matrix, annot=True, fmt="d")

## MLP

In [None]:
# importing modules
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier()
mlp.fit(X_train, y_train)

In [None]:
y_pred=mlp.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
y_pred_proba = mlp.predict_proba(X_test)[:, 1]
metrics.roc_auc_score(y_test, y_pred_proba)

## LGBM

In [None]:
import lightgbm as lgb

In [None]:
lgbm = lgb.LGBMClassifier()

In [None]:
lgbm.fit(X_train, y_train)

In [None]:
y_pred=lgbm.predict(X_test)

### Confusion Matrix

In [None]:
confusion_matrix = confusion_matrix(y_test, y_pred)

In [None]:
sns.heatmap(confusion_matrix, annot=True, fmt="d")

### classification report

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
metrics.accuracy_score(y_test, y_pred)

##### 5-Fold Cross Validation for train and test

In [None]:
from sklearn.model_selection import cross_val_score
cv_train = cross_val_score(lgbm, X_train, y_train, cv=5,scoring="accuracy")
print('Cross validation scores for train:', cv_train.round(2))
print('Average Cross validation scores for train:', np.average(cv_train).round(2))

In [None]:
from sklearn.model_selection import cross_val_score
cv_test = cross_val_score(lgbm, X_test, y_test, cv=5,scoring="accuracy")
print('Cross validation scores for test:', cv_test.round(2))
print('Average Cross validation scores for test:', np.average(cv_test).round(2))

### ROC_AUC

##### 5-Fold Cross Validation for train and test

In [None]:
from sklearn.model_selection import cross_val_score
cv_train = cross_val_score(lgbm, X_train, y_train, cv=5,scoring="roc_auc")
print('Cross validation scores for train:', cv_train.round(10))
print('Average Cross validation scores for train:', np.average(cv_train).round(5))

In [None]:
from sklearn.model_selection import cross_val_score
cv_test = cross_val_score(lgbm, X_test, y_test, cv=5,scoring="roc_auc")
print('Cross validation scores for test:', cv_test.round(5))
print('Average Cross validation scores for test:', np.average(cv_test).round(5))

In [None]:
y_pred_proba = lgbm.predict_proba(X_test)[:, 1]

In [None]:
from sklearn.metrics import roc_auc_score
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
#create ROC curve
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="auc="+str(auc))
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.show()
     

In [None]:
pip install shap --quiet

In [None]:
import shap
explainer= shap.Explainer(lgbm)
shap_values= explainer.shap_values(X_test)
shap.summary_plot(shap_values[1],plot_type="bar",feature_names=X_test.columns)

In [None]:
shap.summary_plot(shap_values,X_test)

## Catboost

### catboost

In [None]:
pip install catboost --quiet

In [None]:
import catboost
cbt = catboost.CatBoostClassifier()

In [None]:
cbt.fit(X_train, y_train,verbose=False)

In [None]:
y_pred_cbt=cbt.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred_cbt))

In [None]:
metrics.accuracy_score(y_test, y_pred_cbt)

In [None]:
y_pred_proba = cbt.predict_proba(X_test)[:, 1]

In [None]:
metrics.roc_auc_score(y_test, y_pred_proba)

In [None]:
import shap
explainer= shap.Explainer(cbt)
shap_values= explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test,plot_type="bar")

In [None]:
shap.summary_plot(shap_values, X_test)

## save and retrain model with most important parameters

In [None]:
import pickle

In [None]:
X = df[["turretsLost","timePlayed","goldEarned","totalMinionsKilled","inhibitorTakedowns","inhibitorsLost","turretTakedowns","kda","champExperience","totalDamageDealt","dragonKills","damageDealtToBuildings"]]
y = df['win']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import confusion_matrix
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y,train_size=0.80)

In [None]:
pip install catboost --quiet

In [None]:
import catboost 
cbt = catboost.CatBoostClassifier()
cbt.fit(X_train1, y_train1,verbose=False)

In [None]:
y_pred=cbt.predict(X_test1)

In [None]:
print(classification_report(y_test1, y_pred))