In [1]:
import pandas as pd

In [107]:
X_train = pd.read_csv('../data/participants_train.csv')
X_test = pd.read_csv('../data/participants_test.csv')
y_train = pd.read_csv('../data/train_winners.csv')
submission = pd.read_csv('../data/sample_submission.csv')


In [108]:
y_train.shape

(8000, 2)

In [3]:
X_train.head(10)

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId
0,0,100,1,0,303,Mordekaiser,82
1,0,100,2,1,616,Sylas,517
2,0,100,3,2,667,Lissandra,127
3,0,100,4,3,860,Caitlyn,51
4,0,100,5,4,325,Morgana,25
5,0,200,6,5,459,Warwick,19
6,0,200,7,6,416,LeeSin,64
7,0,200,8,7,338,Ahri,103
8,0,200,9,8,344,Samira,360
9,0,200,10,9,532,Alistar,12


I'll make a really simple model - I'll simply predict the winner based on which team has the highest level summoner.

In [4]:
train_predictions = ( 
    X_train
    .sort_values(['matchId', 'summonerLevel'], ascending = [True, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions

Unnamed: 0,matchId,teamId
0,0,100
1,1,100
2,2,200
3,3,200
4,4,100
...,...,...
7995,7995,200
7996,7996,200
7997,7997,100
7998,7998,200


In [5]:
from sklearn.metrics import accuracy_score

In [6]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions['teamId']
)

0.504

Looks like I do a pretty terrible job with this method. Nevertheless, I'll implement this on the test data.

In [7]:
y_pred = ( 
    X_test
    .sort_values(['matchId', 'summonerLevel'], ascending = [True, False])
    .drop_duplicates('matchId')
    ['teamId']
    .reset_index(drop = True)
)

y_pred

0       100
1       100
2       200
3       200
4       200
       ... 
1995    100
1996    200
1997    200
1998    200
1999    100
Name: teamId, Length: 2000, dtype: int64

I'll add these to the sample submission.

In [8]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,200
3,8003,200
4,8004,200


Now, I'll export the result.

In [9]:
submission.to_csv('../data/first_submission.csv', index = False)

**READING THE OTHER FILES AND SEE WHAT ARE THERE**

In [10]:
champ = pd.read_csv('../data/champion_mastery.csv') 
#question how to connect this with the X_train data where we have the win and loose stats
champ.columns



Index(['summonerId', 'championId', 'championLevel', 'championPoints',
       'chestGranted', 'tokensEarned'],
      dtype='object')

In [11]:
champ_grp = champ.groupby('championId')['championPoints'].sum().sort_values(ascending =False).to_frame()
champ_grp

Unnamed: 0_level_0,championPoints
championId,Unnamed: 1_level_1
157,4244962990
64,3547947193
67,3247397232
81,3109033237
412,2824465392
...,...
888,166163087
526,143231115
200,94031879
895,80835147


In [12]:
X_train_added = pd.merge(X_train, champ_grp, left_on="championId", right_on="championId", how="left")
X_train_added.head(5)

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId,championPoints
0,0,100,1,0,303,Mordekaiser,82,829357128
1,0,100,2,1,616,Sylas,517,914915168
2,0,100,3,2,667,Lissandra,127,593687341
3,0,100,4,3,860,Caitlyn,51,2483939552
4,0,100,5,4,325,Morgana,25,1939925550


**Modifying the X_test files also as the train file**

In [29]:
X_test_added = pd.merge(X_test, champ_grp, left_on="championId", right_on="championId", how="left")
X_test_added.head(5)

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId,championPoints
0,8000,100,1,23938,57,Fiora,114,1329045944
1,8000,100,2,8037,788,LeeSin,64,3547947193
2,8000,100,3,8444,566,Veigar,45,1251033141
3,8000,100,4,24831,542,Twitch,29,1312924552
4,8000,100,5,23890,224,Soraka,16,1239343872


In [36]:
# **using the championPoints for prediction**

train_predictions_added = ( 
    X_train_added
    .sort_values(['matchId', 'championPoints'], ascending = [True, False])
    .drop_duplicates('matchId')
    ['teamId']
    .reset_index(drop = True)
)

train_predictions_added

0       200
1       200
2       100
3       100
4       200
       ... 
7995    200
7996    100
7997    200
7998    100
7999    100
Name: teamId, Length: 8000, dtype: int64

In [14]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions_added['teamId']
)

#Score is reduced adding the championpoints reduces from 0.504 to 0.489

0.489625

**testing the test data to create the submission.csv**

In [37]:
test_predictions_added = ( 
    X_test_added
    .sort_values(['matchId', 'championPoints'], ascending = [True, False])
    .drop_duplicates('matchId')
    [ 'teamId']
    .reset_index(drop = True)
)

test_predictions_added

0       200
1       100
2       200
3       200
4       200
       ... 
1995    100
1996    100
1997    100
1998    200
1999    100
Name: teamId, Length: 2000, dtype: int64

In [38]:
submission['winner'] = test_predictions_added
submission.head()

Unnamed: 0,matchId,winner
0,8000,200
1,8001,100
2,8002,200
3,8003,200
4,8004,200


In [39]:
submission.to_csv('../data/sm1_submission.csv', index = False)

In [15]:
# **using the championPoints for prediction**

train_predictions_added = ( 
    X_train_added
    .sort_values(['matchId', 'summonerLevel', 'championPoints'], ascending = [True, False, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions_added

Unnamed: 0,matchId,teamId
0,0,100
1,1,100
2,2,200
3,3,200
4,4,100
...,...,...
7995,7995,200
7996,7996,200
7997,7997,100
7998,7998,200


In [16]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions_added['teamId']
)

#it is back to 0.50375, lookslike championpoint has no additive effect.

0.50375

**Will add both the smmerlevel and the chapionpoints for prediction**

In [17]:
champion = pd.read_json('../data/champion.json')
champion

Unnamed: 0,type,format,version,data
Aatrox,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Aatrox', 'key': '..."
Ahri,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Ahri', 'key': '10..."
Akali,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Akali', 'key': '8..."
Akshan,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Akshan', 'key': '..."
Alistar,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Alistar', 'key': ..."
...,...,...,...,...
Zeri,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Zeri', 'key': '22..."
Ziggs,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Ziggs', 'key': '1..."
Zilean,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Zilean', 'key': '..."
Zoe,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Zoe', 'key': '142..."


In [18]:
champion_data = pd.DataFrame.from_dict(champion['data'].values.tolist())   

champion_data.head(5)
champion_data[champion_data['name']== "Akshan"]

Unnamed: 0,version,id,key,name,title,blurb,info,image,tags,partype,stats
3,13.5.1,Akshan,166,Akshan,the Rogue Sentinel,"Raising an eyebrow in the face of danger, Aksh...","{'attack': 0, 'defense': 0, 'magic': 0, 'diffi...","{'full': 'Akshan.png', 'sprite': 'champion0.pn...","[Marksman, Assassin]",Mana,"{'hp': 630, 'hpperlevel': 104, 'mp': 350, 'mpp..."


In [19]:
champion_data_info = pd.DataFrame.from_dict(champion_data['info'].values.tolist())   

champion_data_info.columns

Index(['attack', 'defense', 'magic', 'difficulty'], dtype='object')

In [20]:
champion_data_stats = pd.DataFrame.from_dict(champion_data['stats'].values.tolist())   

champion_data_stats.head(5)

Unnamed: 0,hp,hpperlevel,mp,mpperlevel,movespeed,armor,armorperlevel,spellblock,spellblockperlevel,attackrange,hpregen,hpregenperlevel,mpregen,mpregenperlevel,crit,critperlevel,attackdamage,attackdamageperlevel,attackspeedperlevel,attackspeed
0,650,114,0,0.0,345,38,4.45,32,2.05,175,3.0,1.0,0.0,0.0,0,0,60,5.0,2.5,0.651
1,590,96,418,25.0,330,21,4.7,30,1.3,550,2.5,0.6,8.0,0.8,0,0,53,3.0,2.0,0.668
2,570,119,200,0.0,345,23,4.7,37,2.05,125,9.0,0.9,50.0,0.0,0,0,62,3.3,3.2,0.625
3,630,104,350,40.0,330,26,4.2,30,1.3,500,3.75,0.65,8.2,0.7,0,0,52,3.5,4.0,0.638
4,670,120,350,40.0,330,44,4.7,32,2.05,125,8.5,0.85,8.5,0.8,0,0,62,3.75,2.125,0.625


In [21]:
#merging two df using index when dont have same columns df1.join(df2) or pd.concat([df1, df2], axis=1)

champ_spread = champion_data.join(champion_data_stats)

champ_spread.describe()
champ_spread.shape #162, 31
champ_spread.columns.to_list()

['version',
 'id',
 'key',
 'name',
 'title',
 'blurb',
 'info',
 'image',
 'tags',
 'partype',
 'stats',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed']

In [22]:
champ_spread1 = champ_spread.drop(columns = ['version',
 'id',
 
 'title',
 'blurb',
 'info',
 'image',
 'tags',
 'partype',
 'stats']).rename(columns = {'key': 'championId',
                             'name': 'championName'})

In [23]:
X_train_added.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 80000 entries, 0 to 79999
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   matchId         80000 non-null  int64 
 1   teamId          80000 non-null  int64 
 2   participantId   80000 non-null  int64 
 3   summonerId      80000 non-null  int64 
 4   summonerLevel   80000 non-null  int64 
 5   championName    80000 non-null  object
 6   championId      80000 non-null  int64 
 7   championPoints  80000 non-null  int64 
dtypes: int64(7), object(1)
memory usage: 5.5+ MB


In [24]:


champ_spread1['championId'] = champ_spread1['championId'].astype('int64')
champ_spread1.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   championId            162 non-null    int64  
 1   championName          162 non-null    object 
 2   hp                    162 non-null    int64  
 3   hpperlevel            162 non-null    int64  
 4   mp                    162 non-null    int64  
 5   mpperlevel            162 non-null    float64
 6   movespeed             162 non-null    int64  
 7   armor                 162 non-null    int64  
 8   armorperlevel         162 non-null    float64
 9   spellblock            162 non-null    int64  
 10  spellblockperlevel    162 non-null    float64
 11  attackrange           162 non-null    int64  
 12  hpregen               162 non-null    float64
 13  hpregenperlevel       162 non-null    float64
 14  mpregen               162 non-null    float64
 15  mpregenperlevel       1

In [25]:
X_train_added1 = pd.merge(X_train_added, champ_spread1, 
                          left_on=["championId", "championName"], 
                          right_on=["championId", "championName"], 
                          how="left")
X_train_added1.head(5)
X_train_added1.columns.to_list()

['matchId',
 'teamId',
 'participantId',
 'summonerId',
 'summonerLevel',
 'championName',
 'championId',
 'championPoints',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed']

In [26]:
teamposition = pd.read_csv('../data/teamPositions.csv') #matchId	participantId	teamPositio

X_train_added2 = pd.merge(X_train_added1, teamposition, 
                          left_on=["matchId", "participantId"], 
                          right_on=["matchId", "participantId"], 
                          how="left")
X_train_added2.head(5)
X_train_added2.columns.to_list()

['matchId',
 'teamId',
 'participantId',
 'summonerId',
 'summonerLevel',
 'championName',
 'championId',
 'championPoints',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed',
 'teamPosition']

In [27]:
train_predictions_added1 = ( 
    X_train_added1
    .sort_values(['matchId', 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed'], ascending = [True, False, False, False, False, 
                              False, False, False, False, False,
                             False, False, False, False, False,
                             False, False, False, False, False, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions_added1

Unnamed: 0,matchId,teamId
0,0,200
1,1,200
2,2,200
3,3,100
4,4,200
...,...,...
7995,7995,200
7996,7996,200
7997,7997,100
7998,7998,200


In [142]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions_added1['teamId']
)

0.502

In [40]:
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import seaborn as sns
import matplotlib.pyplot as plt

kfold = KFold(n_splits = 10, shuffle = True, random_state = 42)
mastery_cv_scores = cross_val_score(
    estimator = logreg_mastery,
    X = X_train,
    y = y_train,
    cv = kfold
)

print(mastery_cv_scores)
print(np.mean(mastery_cv_scores))

In [44]:
X = X_train_added2
# y = y_train

# X_train_sm1, X_test_sm1, y_train_sm1, y_test_sm1 = train_test_split(X, y, random_state = 321)

# linreg_base = LinearRegression().fit(X_train_sm1, y_train_sm1)# 

X_train_added2.columns
X_train_added2.head(10)

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId,championPoints,hp,hpperlevel,...,hpregenperlevel,mpregen,mpregenperlevel,crit,critperlevel,attackdamage,attackdamageperlevel,attackspeedperlevel,attackspeed,teamPosition
0,0,100,1,0,303,Mordekaiser,82,829357128,645.0,104.0,...,0.75,0.0,0.0,0.0,0.0,61.0,4.0,1.0,0.625,TOP
1,0,100,2,1,616,Sylas,517,914915168,575.0,129.0,...,0.9,8.0,0.8,0.0,0.0,61.0,3.0,3.5,0.645,JUNGLE
2,0,100,3,2,667,Lissandra,127,593687341,620.0,110.0,...,0.55,8.0,0.4,0.0,0.0,55.0,2.7,1.36,0.656,MIDDLE
3,0,100,4,3,860,Caitlyn,51,2483939552,580.0,107.0,...,0.55,7.4,0.7,0.0,0.0,60.0,3.8,4.0,0.681,BOTTOM
4,0,100,5,4,325,Morgana,25,1939925550,630.0,104.0,...,0.4,11.0,0.4,0.0,0.0,56.0,3.5,1.53,0.625,UTILITY
5,0,200,6,5,459,Warwick,19,699690687,620.0,99.0,...,0.75,7.45,0.6,0.0,0.0,65.0,3.0,2.3,0.638,TOP
6,0,200,7,6,416,LeeSin,64,3547947193,,,...,,,,,,,,,,JUNGLE
7,0,200,8,7,338,Ahri,103,1936114761,590.0,96.0,...,0.6,8.0,0.8,0.0,0.0,53.0,3.0,2.0,0.668,MIDDLE
8,0,200,9,8,344,Samira,360,577457532,600.0,108.0,...,0.55,8.2,0.7,0.0,0.0,57.0,3.3,3.3,0.658,BOTTOM
9,0,200,10,9,532,Alistar,12,857686309,670.0,120.0,...,0.85,8.5,0.8,0.0,0.0,62.0,3.75,2.125,0.625,UTILITY


In [79]:
pd.concat([X_train_added2[X_train_added2['participantId']==1].reset_index(drop=True),
           X_train_added2[X_train_added2['participantId']==2].reset_index(drop=True),
           X_train_added2[X_train_added2['participantId']==3].reset_index(drop=True),
          X_train_added2[X_train_added2['participantId']==4].reset_index(drop=True)], 
          axis=1)

# X_train_added2[X_train_added2['participantId']==1]

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId,championPoints,hp,hpperlevel,...,hpregenperlevel,mpregen,mpregenperlevel,crit,critperlevel,attackdamage,attackdamageperlevel,attackspeedperlevel,attackspeed,teamPosition
0,0,100,1,0,303,Mordekaiser,82,829357128,645.0,104.0,...,0.55,7.40,0.7,0.0,0.0,60.0,3.8,4.00,0.681,BOTTOM
1,1,100,1,10,570,Riven,92,2206187838,630.0,100.0,...,,,,,,,,,,BOTTOM
2,2,100,1,20,619,Aatrox,266,778374889,650.0,114.0,...,0.55,7.40,0.7,0.0,0.0,60.0,3.8,4.00,0.681,BOTTOM
3,3,100,1,30,437,Sett,875,854963791,670.0,114.0,...,0.55,7.40,0.7,0.0,0.0,60.0,3.8,4.00,0.681,BOTTOM
4,4,100,1,40,668,Irelia,39,1653572892,590.0,124.0,...,0.70,6.00,0.8,0.0,0.0,53.0,1.3,2.00,0.658,BOTTOM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,7995,100,1,1283,766,Kaisa,145,1854044088,,,...,0.55,6.30,0.4,0.0,0.0,66.0,3.5,4.50,0.694,BOTTOM
7996,7996,100,1,37425,111,Malphite,54,813433826,644.0,104.0,...,0.65,8.50,1.0,0.0,0.0,60.0,2.5,2.50,0.625,BOTTOM
7997,7997,100,1,1190,84,Urgot,6,479093267,655.0,102.0,...,0.75,8.25,0.8,0.0,0.0,60.0,3.5,3.90,0.658,BOTTOM
7998,7998,100,1,29798,99,Shen,98,870295455,610.0,99.0,...,0.50,6.70,1.0,0.0,0.0,59.0,3.4,1.36,0.625,BOTTOM


In [93]:
data = X_train_added2[['matchId',
 'teamId',
'participantId',
    'summonerLevel',
  'championId',
 'championPoints',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed']]
 #'teamPosition']
        
data.head(5)

Unnamed: 0,matchId,teamId,participantId,summonerLevel,championId,championPoints,hp,hpperlevel,mp,mpperlevel,...,hpregen,hpregenperlevel,mpregen,mpregenperlevel,crit,critperlevel,attackdamage,attackdamageperlevel,attackspeedperlevel,attackspeed
0,0,100,1,303,82,829357128,645.0,104.0,100.0,0.0,...,5.0,0.75,0.0,0.0,0.0,0.0,61.0,4.0,1.0,0.625
1,0,100,2,616,517,914915168,575.0,129.0,310.0,70.0,...,9.0,0.9,8.0,0.8,0.0,0.0,61.0,3.0,3.5,0.645
2,0,100,3,667,127,593687341,620.0,110.0,475.0,30.0,...,7.0,0.55,8.0,0.4,0.0,0.0,55.0,2.7,1.36,0.656
3,0,100,4,860,51,2483939552,580.0,107.0,315.0,40.0,...,3.5,0.55,7.4,0.7,0.0,0.0,60.0,3.8,4.0,0.681
4,0,100,5,325,25,1939925550,630.0,104.0,340.0,60.0,...,5.5,0.4,11.0,0.4,0.0,0.0,56.0,3.5,1.53,0.625


In [121]:
data_x=pd.concat([data[data['participantId']==1].reset_index(drop=True).drop(columns = ['teamId', 'participantId']).rename(columns=lambda X: X+"_1"),
           data[data['participantId']==2].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_2"),
           data[data['participantId']==3].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_3"),
          data[data['participantId']==4].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_4"),
          data[data['participantId']==5].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_5"),
          data[data['participantId']==6].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_6"),
          data[data['participantId']==7].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_7"),
          data[data['participantId']==8].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_8"),
          data[data['participantId']==9].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_9"),
          data[data['participantId']==10].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_10")], 
          axis=1)
data_x.shape #8000, 240
# data_x.columns.to_list()
data_x.head(10)

Unnamed: 0,matchId_1,summonerLevel_1,championId_1,championPoints_1,hp_1,hpperlevel_1,mp_1,mpperlevel_1,movespeed_1,armor_1,...,hpregen_10,hpregenperlevel_10,mpregen_10,mpregenperlevel_10,crit_10,critperlevel_10,attackdamage_10,attackdamageperlevel_10,attackspeedperlevel_10,attackspeed_10
0,0,303,82,829357128,645.0,104.0,100.0,0.0,335.0,37.0,...,8.5,0.85,8.5,0.8,0.0,0.0,62.0,3.75,2.125,0.625
1,1,570,92,2206187838,630.0,100.0,0.0,0.0,340.0,33.0,...,5.0,0.5,8.75,0.5,0.0,0.0,62.0,3.5,3.0,0.635
2,2,619,266,778374889,650.0,114.0,0.0,0.0,345.0,38.0,...,7.0,0.5,8.0,1.0,0.0,0.0,62.0,2.0,2.5,0.667
3,3,437,875,854963791,670.0,114.0,0.0,0.0,340.0,33.0,...,5.5,0.55,13.0,0.5,0.0,0.0,51.0,3.3,2.3,0.625
4,4,668,39,1653572892,590.0,124.0,350.0,50.0,335.0,36.0,...,6.0,0.6,11.0,0.6,0.0,0.0,47.0,2.6,2.25,0.625
5,5,462,122,1429087803,652.0,114.0,263.0,58.0,340.0,39.0,...,7.0,0.55,6.0,0.8,0.0,0.0,56.0,2.2,3.5,0.625
6,6,54,114,1329045944,620.0,99.0,300.0,60.0,345.0,33.0,...,5.5,0.55,13.0,0.5,0.0,0.0,51.0,3.3,2.3,0.625
7,7,348,25,1939925550,630.0,104.0,340.0,60.0,335.0,25.0,...,,,,,,,,,,
8,8,48,887,267274892,620.0,109.0,330.0,40.0,340.0,39.0,...,6.0,0.6,11.0,0.6,0.0,0.0,47.0,2.6,2.25,0.625
9,9,101,6,479093267,655.0,102.0,340.0,45.0,330.0,36.0,...,5.5,0.55,11.5,0.4,0.0,0.0,51.0,3.1,2.61,0.644


In [109]:
y_train.head(5)
y_train.shape

(8000, 2)

In [113]:
X = data_x
y = y_train.drop(columns = ['matchId'])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 321)


In [104]:
X_train.isna().sum()

matchId_1                    0
summonerLevel_1              0
championId_1                 0
championPoints_1             0
hp_1                       468
                          ... 
critperlevel_10            251
attackdamage_10            251
attackdamageperlevel_10    251
attackspeedperlevel_10     251
attackspeed_10             251
Length: 231, dtype: int64

In [114]:
logreg = LogisticRegression().fit(X_train.fillna(0), y_train)

  y = column_or_1d(y, warn=True)


In [115]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [119]:
y_pred = logreg.predict(X_test.fillna(0))

In [122]:
accuracy_score(y_test, y_pred)

0.513

**Having the teamdata all the values we are still at 51.3% accuracy level we should add the team position and HotEncoding the positions**

sum the values of a column and make new
df['Ball wise total runs']= df.iloc[:, -4:-1].sum(axis=1)

In [225]:
data_x.columns.to_list()

['matchId_1',
 'summonerLevel_1',
 'championId_1',
 'championPoints_1',
 'hp_1',
 'hpperlevel_1',
 'mp_1',
 'mpperlevel_1',
 'movespeed_1',
 'armor_1',
 'armorperlevel_1',
 'spellblock_1',
 'spellblockperlevel_1',
 'attackrange_1',
 'hpregen_1',
 'hpregenperlevel_1',
 'mpregen_1',
 'mpregenperlevel_1',
 'crit_1',
 'critperlevel_1',
 'attackdamage_1',
 'attackdamageperlevel_1',
 'attackspeedperlevel_1',
 'attackspeed_1',
 'summonerLevel_2',
 'championId_2',
 'championPoints_2',
 'hp_2',
 'hpperlevel_2',
 'mp_2',
 'mpperlevel_2',
 'movespeed_2',
 'armor_2',
 'armorperlevel_2',
 'spellblock_2',
 'spellblockperlevel_2',
 'attackrange_2',
 'hpregen_2',
 'hpregenperlevel_2',
 'mpregen_2',
 'mpregenperlevel_2',
 'crit_2',
 'critperlevel_2',
 'attackdamage_2',
 'attackdamageperlevel_2',
 'attackspeedperlevel_2',
 'attackspeed_2',
 'summonerLevel_3',
 'championId_3',
 'championPoints_3',
 'hp_3',
 'hpperlevel_3',
 'mp_3',
 'mpperlevel_3',
 'movespeed_3',
 'armor_3',
 'armorperlevel_3',
 'spellb

In [160]:
last_aj=pd.read_csv('../data/last_frame_valuesALL.csv')

last_aj.shape #80000, 47


(80000, 47)

Column in the .csv from aj 
Index(['matchId', 'participantId', 'final_gold', 'final_xp',
       'final_abilityHaste', 'final_abilityPower', 'final_armor',
       'final_armorPen', 'final_armorPenPercent', 'final_attackDamage',
       'final_attackSpeed', 'final_bonArbPct', 'final_ccReduction',
       'final_cooldownReduction', 'final_health', 'final_healthMax',
       'final_healthRegen', 'final_lifesteal', 'final_magicPen',
       'final_magicPenPercent', 'final_magicResist', 'final_movementSpeed',
       'final_omnivamp', 'final_physicalVamp', 'final_power', 'final_powerMax',
       'final_powerRegen', 'final_spellVamp', 'final_magicDamageDone',
       'final_magicDamageDoneToChampions', 'final_magicDamageTaken',
       'final_physicalDamageDone', 'final_physicalDamageDoneToChampions',
       'final_physicalDamageTaken', 'final_totalDamageDone',
       'final_totalDamageDoneToChampions', 'final_totalDamageTaken',
       'final_trueDamageDone', 'final_trueDamageDoneToChampions',
       'final_trueDamageTaken', 'final_currentGold', 'final_goldPerSecond',
       'final_jungleMinionsKilled', 'final_level', 'final_minionsKilled',
       'final_timeEnemySpentControlled', 'final_totalGold'],
      dtype='object')

In [224]:
data.shape
data.columns.to_list()
data_aj = pd.merge(data, last_aj, left_on=["matchId", "participantId"], right_on=["matchId", "participantId"], how="left")
data_aj.columns.to_list()
# data_aj.shape
# data_aj.head(5)

['matchId',
 'teamId',
 'participantId',
 'summonerLevel',
 'championId',
 'championPoints',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed',
 'final_gold',
 'final_xp',
 'final_abilityHaste',
 'final_abilityPower',
 'final_armor',
 'final_armorPen',
 'final_armorPenPercent',
 'final_attackDamage',
 'final_attackSpeed',
 'final_bonArbPct',
 'final_ccReduction',
 'final_cooldownReduction',
 'final_health',
 'final_healthMax',
 'final_healthRegen',
 'final_lifesteal',
 'final_magicPen',
 'final_magicPenPercent',
 'final_magicResist',
 'final_movementSpeed',
 'final_omnivamp',
 'final_physicalVamp',
 'final_power',
 'final_powerMax',
 'final_powerRegen',
 'final_spellVamp',
 'final_magicDamageDone',
 'final_magicDamageDoneToChampi

**converting each matchId as one row to have same number of observations(rows) for X_train and y_train**

In [146]:
data_aj_x=pd.concat([data_aj[data_aj['participantId']==1].reset_index(drop=True).drop(columns = ['teamId', 'participantId']).rename(columns=lambda X: X+"_1"),
           data_aj[data_aj['participantId']==2].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_2"),
           data_aj[data_aj['participantId']==3].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_3"),
          data_aj[data_aj['participantId']==4].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_4"),
         data_aj[data_aj['participantId']==5].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_5"),
          data_aj[data_aj['participantId']==6].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_6"),
          data_aj[data_aj['participantId']==7].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_7"),
         data_aj[data_aj['participantId']==8].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_8"),
          data_aj[data_aj['participantId']==9].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_9"),
         data_aj[data_aj['participantId']==10].reset_index(drop=True).drop(columns = ['matchId',
                                                                             'teamId', 'participantId']).rename(columns=lambda X: X+"_10")], 
          axis=1)
data_aj_x.shape (8000, 681)

(8000, 681)

In [151]:
# rereading the y_train data because it becomes 6000 from 8000 upon split in previous model, 
# so rereading and calling it ydata_train

# y_train.shape #(6000, 1)
ydata_train = pd.read_csv('../data/train_winners.csv')

ydata_train.shape

(8000, 2)

In [152]:
X = data_aj_x
y = ydata_train.drop(columns = ['matchId'])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 321) #split the training data into test and train set.

In [204]:
?train_test_split

In [162]:
X.shape
X_train.shape

(6000, 681)

In [163]:
logreg_aj = LogisticRegression().fit(X_train.fillna(0), y_train)

  y = column_or_1d(y, warn=True)


In [164]:
y_pred_aj = logreg_aj.predict(X_test.fillna(0))

In [165]:
accuracy_score(y_test, y_pred_aj)

0.513

Apply regularization, use scaller, impute

In [170]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer #fills average of the column if there are NaN missing values

In [168]:
?LogisticRegression #to find the variable that can be adjusted

In [201]:
logReg_aj_scaler = Pipeline(
    steps = [
        ('impute', SimpleImputer()), #fills average of the column if there are NaN missing values
        ('scaler', StandardScaler()),
        ('log_reg', LogisticRegression(C = 0.0001)) #started from 0.01, 0,001, 
    ]
).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [202]:
scal_pred = logReg_aj_scaler.predict(X_test)

In [203]:
accuracy_score(y_test, scal_pred)

0.7015

In [212]:
logReg_aj_scaler['scaler'].get_feature_names_out()
logReg_aj_scaler['log_reg'].coef_.flatten() #turns into one array

array([-6.99279768e-03,  9.26953999e-05,  1.25317492e-04,  2.27867709e-03,
        1.88042964e-03,  2.38545103e-03,  3.49819001e-03, -1.62754413e-03,
       -1.15169376e-03,  9.72244995e-04,  2.37995841e-03,  4.18944204e-03,
        2.41310752e-03,  2.26436796e-04,  2.30976978e-03,  4.48752978e-03,
       -1.29528984e-03, -1.80095146e-03,  0.00000000e+00,  0.00000000e+00,
       -1.29509242e-03,  1.51135954e-03, -1.25855666e-03, -3.15335298e-03,
       -2.83516762e-02, -1.73191051e-02,  0.00000000e+00, -2.45492346e-03,
       -6.08626421e-03,  0.00000000e+00,  6.12236103e-03, -8.94054679e-03,
       -4.82128666e-03,  0.00000000e+00, -1.91728923e-03,  0.00000000e+00,
       -9.81827928e-03, -1.36866345e-02, -7.72807422e-03,  3.00911856e-03,
        7.09707907e-03,  1.20618806e-03, -4.40302984e-03, -4.72985048e-03,
       -8.65253482e-04,  0.00000000e+00,  2.54864384e-03,  1.37513660e-03,
       -5.90858558e-04,  4.99689876e-03, -3.55877050e-03, -5.03669151e-03,
        2.13537992e-03, -

In [222]:
# look at coefficient

pd.DataFrame({'feature': X_train.columns,
                             'coefficient': logReg_aj_scaler['log_reg'].coef_.flatten()}).sort_values('coefficient', 
                                                                                                      ascending =False).iloc[0:20]

Unnamed: 0,feature,coefficient
568,final_gold_9,0.030049
612,final_totalGold_9,0.030049
432,final_gold_7,0.027639
476,final_totalGold_7,0.027639
500,final_gold_8,0.026315
544,final_totalGold_8,0.026315
408,final_totalGold_6,0.025429
364,final_gold_6,0.025429
600,final_totalDamageDone_9,0.024598
636,final_gold_10,0.02177
