# Importing Libraries

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import StandardScaler

# Data Exploration

In [32]:
df = pd.read_csv("ChampionMatchStats.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4960 entries, 0 to 4959
Data columns (total 80 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   championName                    4960 non-null   object
 1   assists                         4960 non-null   int64 
 2   baronKills                      4960 non-null   int64 
 3   bountyLevel                     4960 non-null   int64 
 4   consumablesPurchased            4960 non-null   int64 
 5   damageDealtToBuildings          4960 non-null   int64 
 6   damageDealtToObjectives         4960 non-null   int64 
 7   damageDealtToTurrets            4960 non-null   int64 
 8   damageSelfMitigated             4960 non-null   int64 
 9   deaths                          4960 non-null   int64 
 10  detectorWardsPlaced             4960 non-null   int64 
 11  doubleKills                     4960 non-null   int64 
 12  dragonKills                     4960 non-null   

In [33]:
duplicates_mask = df.duplicated(keep=False)

# Count the number of true duplicates
num_duplicates = duplicates_mask.sum()
print(f"Number of true duplicates: {num_duplicates}")

# Display the duplicate rows
print("Duplicate rows:")
print(df[duplicates_mask])

# Get the row numbers of the duplicates
row_numbers = np.where(duplicates_mask)[0]
print(f"Row numbers of duplicate rows: {row_numbers}")

Number of true duplicates: 260
Duplicate rows:
     championName  assists  baronKills  bountyLevel  consumablesPurchased  \
450         Fiora        5           0            1                     4   
451       Belveth       11           1            9                     3   
452         Annie       14           0            0                     4   
453       Karthus       12           0            3                     2   
454       Alistar       21           0            0                    10   
...           ...      ...         ...          ...                   ...   
4105       Maokai       12           0            1                     7   
4106       LeeSin       11           1            2                     8   
4107   Cassiopeia        4           0            0                     2   
4108       Lucian        5           0            2                     5   
4109        Rakan       23           0            0                    12   

      damageDealtToBuildings

In [34]:
# Remove the duplicate rows
df = df.drop_duplicates()

# Optionally, you can reset the index after dropping duplicates
df = df.reset_index(drop=True)

In [35]:
target = 'win'
features = [i for i in df.columns if i not in [target]]
original_df = df.copy(deep=True)
print('\n\033[1mInference:\033[0m The Datset consists of {} features & {} samples.'.format(df.shape[1], df.shape[0]))


[1mInference:[0m The Datset consists of 80 features & 4830 samples.


In [36]:
df.nunique().sort_values()

unrealKills                       1
sightWardsBoughtInGame            1
nexusTakedowns                    2
teamEarlySurrendered              2
nexusLost                         2
                               ... 
totalDamageDealtToChampions    4477
totalDamageTaken               4505
physicalDamageDealt            4652
totalDamageDealt               4756
perks                          4830
Length: 80, dtype: int64

In [37]:
nu = df[features].nunique().sort_values()
nf = []; cf = []; nnf = 0; ncf = 0; #numerical & categorical features

for i in range(df[features].shape[1]):
    if nu.values[i]<=16:cf.append(nu.index[i])
    else: nf.append(nu.index[i])

print('\n\033[1mInference:\033[0m The Datset has {} numerical & {} categorical features.'.format(len(nf),len(cf)))


[1mInference:[0m The Datset has 47 numerical & 32 categorical features.


In [38]:
display(df.describe())

Unnamed: 0,assists,baronKills,bountyLevel,consumablesPurchased,damageDealtToBuildings,damageDealtToObjectives,damageDealtToTurrets,damageSelfMitigated,deaths,detectorWardsPlaced,...,trueDamageDealtToChampions,trueDamageTaken,turretKills,turretTakedowns,turretsLost,unrealKills,visionScore,visionWardsBoughtInGame,wardsKilled,wardsPlaced
count,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,...,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0,4830.0
mean,10.908282,0.066874,0.951967,3.504762,2136.456522,7718.303727,2136.456522,17426.836232,5.954037,1.742236,...,1038.65735,1092.316356,0.804348,1.693789,4.168737,0.0,20.100414,2.010559,2.635404,8.34058
std,9.476288,0.267444,1.91196,3.314951,2684.405498,11033.546068,2684.405498,16634.508188,3.734339,2.50227,...,1692.803941,1342.612491,1.181071,1.793208,3.295908,0.0,21.552713,2.813689,3.218647,10.194838
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,4.0,0.0,0.0,1.0,247.0,940.0,247.0,7300.5,3.0,0.0,...,64.0,301.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,1.0
50%,8.0,0.0,0.0,3.0,1183.5,3030.5,1183.5,12373.0,6.0,1.0,...,521.0,660.5,0.0,1.0,4.0,0.0,15.0,1.0,2.0,6.0
75%,16.0,0.0,1.0,5.0,3007.75,9684.25,3007.75,21627.0,8.0,3.0,...,1224.75,1355.0,1.0,3.0,7.0,0.0,27.0,3.0,4.0,10.0
max,65.0,2.0,16.0,25.0,21896.0,94556.0,21896.0,203932.0,25.0,20.0,...,18412.0,16272.0,9.0,10.0,11.0,0.0,161.0,23.0,35.0,89.0


# EDA