In [1]:
import pandas as pd

In [3]:
X_train = pd.read_csv('../data/participants_train.csv')
X_test = pd.read_csv('../data/participants_test.csv')
y_train = pd.read_csv('../data/train_winners.csv')
submission = pd.read_csv('../data/sample_submission.csv')


In [15]:
X_train.head(10)

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId
0,0,100,1,0,303,Mordekaiser,82
1,0,100,2,1,616,Sylas,517
2,0,100,3,2,667,Lissandra,127
3,0,100,4,3,860,Caitlyn,51
4,0,100,5,4,325,Morgana,25
5,0,200,6,5,459,Warwick,19
6,0,200,7,6,416,LeeSin,64
7,0,200,8,7,338,Ahri,103
8,0,200,9,8,344,Samira,360
9,0,200,10,9,532,Alistar,12


I'll make a really simple model - I'll simply predict the winner based on which team has the highest level summoner.

In [83]:
train_predictions = ( 
    X_train
    .sort_values(['matchId', 'summonerLevel'], ascending = [True, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions

Unnamed: 0,matchId,teamId
0,0,100
1,1,100
2,2,200
3,3,200
4,4,100
...,...,...
7995,7995,200
7996,7996,200
7997,7997,100
7998,7998,200


In [6]:
from sklearn.metrics import accuracy_score

In [7]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions['teamId']
)

0.504

Looks like I do a pretty terrible job with this method. Nevertheless, I'll implement this on the test data.

In [9]:
y_pred = ( 
    X_test
    .sort_values(['matchId', 'summonerLevel'], ascending = [True, False])
    .drop_duplicates('matchId')
    ['teamId']
    .reset_index(drop = True)
)

y_pred

0       100
1       100
2       200
3       200
4       200
       ... 
1995    100
1996    200
1997    200
1998    200
1999    100
Name: teamId, Length: 2000, dtype: int64

I'll add these to the sample submission.

In [10]:
submission['winner'] = y_pred
submission.head()

Unnamed: 0,matchId,winner
0,8000,100
1,8001,100
2,8002,200
3,8003,200
4,8004,200


Now, I'll export the result.

In [11]:
submission.to_csv('../data/first_submission.csv', index = False)

**READING THE OTHER FILES AND SEE WHAT ARE THERE**

In [70]:
champ = pd.read_csv('../data/champion_mastery.csv') 
#question how to connect this with the X_train data where we have the win and loose stats
champ.columns



Index(['summonerId', 'championId', 'championLevel', 'championPoints',
       'chestGranted', 'tokensEarned'],
      dtype='object')

In [73]:
champ_grp = champ.groupby('championId')['championPoints'].sum().sort_values(ascending =False).to_frame()
champ_grp

Unnamed: 0_level_0,championPoints
championId,Unnamed: 1_level_1
157,4244962990
64,3547947193
67,3247397232
81,3109033237
412,2824465392
...,...
888,166163087
526,143231115
200,94031879
895,80835147


In [79]:
X_train_added = pd.merge(X_train, champ_grp, left_on="championId", right_on="championId", how="left")
X_train_added.head(5)

Unnamed: 0,matchId,teamId,participantId,summonerId,summonerLevel,championName,championId,championPoints
0,0,100,1,0,303,Mordekaiser,82,829357128
1,0,100,2,1,616,Sylas,517,914915168
2,0,100,3,2,667,Lissandra,127,593687341
3,0,100,4,3,860,Caitlyn,51,2483939552
4,0,100,5,4,325,Morgana,25,1939925550


In [93]:
# **using the championPoints for prediction**

train_predictions_added = ( 
    X_train_added
    .sort_values(['matchId', 'championPoints'], ascending = [True, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions_added

Unnamed: 0,matchId,teamId
0,0,200
1,1,200
2,2,100
3,3,100
4,4,200
...,...,...
7995,7995,200
7996,7996,100
7997,7997,200
7998,7998,100


In [94]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions_added['teamId']
)

#Score is reduced adding the championpoints reduces from 0.504 to 0.489

0.489625

In [95]:
# **using the championPoints for prediction**

train_predictions_added = ( 
    X_train_added
    .sort_values(['matchId', 'summonerLevel', 'championPoints'], ascending = [True, False, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions_added

Unnamed: 0,matchId,teamId
0,0,100
1,1,100
2,2,200
3,3,200
4,4,100
...,...,...
7995,7995,200
7996,7996,200
7997,7997,100
7998,7998,200


In [97]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions_added['teamId']
)

#it is back to 0.50375, lookslike championpoint has no additive effect.

0.50375

**Will add both the smmerlevel and the chapionpoints for prediction**

In [18]:
champion = pd.read_json('../data/champion.json')
champion

Unnamed: 0,type,format,version,data
Aatrox,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Aatrox', 'key': '..."
Ahri,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Ahri', 'key': '10..."
Akali,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Akali', 'key': '8..."
Akshan,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Akshan', 'key': '..."
Alistar,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Alistar', 'key': ..."
...,...,...,...,...
Zeri,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Zeri', 'key': '22..."
Ziggs,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Ziggs', 'key': '1..."
Zilean,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Zilean', 'key': '..."
Zoe,champion,standAloneComplex,13.5.1,"{'version': '13.5.1', 'id': 'Zoe', 'key': '142..."


In [68]:
champion_data = pd.DataFrame.from_dict(champion['data'].values.tolist())   

champion_data.head(5)
champion_data[champion_data['name']== "Akshan"]

Unnamed: 0,version,id,key,name,title,blurb,info,image,tags,partype,stats
3,13.5.1,Akshan,166,Akshan,the Rogue Sentinel,"Raising an eyebrow in the face of danger, Aksh...","{'attack': 0, 'defense': 0, 'magic': 0, 'diffi...","{'full': 'Akshan.png', 'sprite': 'champion0.pn...","[Marksman, Assassin]",Mana,"{'hp': 630, 'hpperlevel': 104, 'mp': 350, 'mpp..."


In [26]:
champion_data_info = pd.DataFrame.from_dict(champion_data['info'].values.tolist())   

champion_data_info.columns

Index(['attack', 'defense', 'magic', 'difficulty'], dtype='object')

In [29]:
champion_data_stats = pd.DataFrame.from_dict(champion_data['stats'].values.tolist())   

champion_data_stats.head(5)

Unnamed: 0,hp,hpperlevel,mp,mpperlevel,movespeed,armor,armorperlevel,spellblock,spellblockperlevel,attackrange,hpregen,hpregenperlevel,mpregen,mpregenperlevel,crit,critperlevel,attackdamage,attackdamageperlevel,attackspeedperlevel,attackspeed
0,650,114,0,0.0,345,38,4.45,32,2.05,175,3.0,1.0,0.0,0.0,0,0,60,5.0,2.5,0.651
1,590,96,418,25.0,330,21,4.7,30,1.3,550,2.5,0.6,8.0,0.8,0,0,53,3.0,2.0,0.668
2,570,119,200,0.0,345,23,4.7,37,2.05,125,9.0,0.9,50.0,0.0,0,0,62,3.3,3.2,0.625
3,630,104,350,40.0,330,26,4.2,30,1.3,500,3.75,0.65,8.2,0.7,0,0,52,3.5,4.0,0.638
4,670,120,350,40.0,330,44,4.7,32,2.05,125,8.5,0.85,8.5,0.8,0,0,62,3.75,2.125,0.625


In [109]:
#merging two df using index when dont have same columns df1.join(df2) or pd.concat([df1, df2], axis=1)

champ_spread = champion_data.join(champion_data_stats)

champ_spread.describe()
champ_spread.shape #162, 31
champ_spread.columns.to_list()

['version',
 'id',
 'key',
 'name',
 'title',
 'blurb',
 'info',
 'image',
 'tags',
 'partype',
 'stats',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed']

In [115]:
champ_spread1 = champ_spread.drop(columns = ['version',
 'id',
 
 'title',
 'blurb',
 'info',
 'image',
 'tags',
 'partype',
 'stats']).rename(columns = {'key': 'championId',
                             'name': 'championName'})

In [124]:
X_train_added.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 80000 entries, 0 to 79999
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   matchId         80000 non-null  int64 
 1   teamId          80000 non-null  int64 
 2   participantId   80000 non-null  int64 
 3   summonerId      80000 non-null  int64 
 4   summonerLevel   80000 non-null  int64 
 5   championName    80000 non-null  object
 6   championId      80000 non-null  int64 
 7   championPoints  80000 non-null  int64 
dtypes: int64(7), object(1)
memory usage: 5.5+ MB


In [129]:


champ_spread1['championId'] = champ_spread1['championId'].astype('int64')
champ_spread1.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   championId            162 non-null    int64  
 1   championName          162 non-null    object 
 2   hp                    162 non-null    int64  
 3   hpperlevel            162 non-null    int64  
 4   mp                    162 non-null    int64  
 5   mpperlevel            162 non-null    float64
 6   movespeed             162 non-null    int64  
 7   armor                 162 non-null    int64  
 8   armorperlevel         162 non-null    float64
 9   spellblock            162 non-null    int64  
 10  spellblockperlevel    162 non-null    float64
 11  attackrange           162 non-null    int64  
 12  hpregen               162 non-null    float64
 13  hpregenperlevel       162 non-null    float64
 14  mpregen               162 non-null    float64
 15  mpregenperlevel       1

In [133]:
X_train_added1 = pd.merge(X_train_added, champ_spread1, 
                          left_on=["championId", "championName"], 
                          right_on=["championId", "championName"], 
                          how="left")
X_train_added1.head(5)
X_train_added1.columns.to_list()

['matchId',
 'teamId',
 'participantId',
 'summonerId',
 'summonerLevel',
 'championName',
 'championId',
 'championPoints',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed']

In [136]:
teamposition = pd.read_csv('../data/teamPositions.csv') #matchId	participantId	teamPositio

X_train_added2 = pd.merge(X_train_added1, teamposition, 
                          left_on=["matchId", "participantId"], 
                          right_on=["matchId", "participantId"], 
                          how="left")
X_train_added2.head(5)
X_train_added2.columns.to_list()

['matchId',
 'teamId',
 'participantId',
 'summonerId',
 'summonerLevel',
 'championName',
 'championId',
 'championPoints',
 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed',
 'teamPosition']

In [141]:
train_predictions_added1 = ( 
    X_train_added1
    .sort_values(['matchId', 'hp',
 'hpperlevel',
 'mp',
 'mpperlevel',
 'movespeed',
 'armor',
 'armorperlevel',
 'spellblock',
 'spellblockperlevel',
 'attackrange',
 'hpregen',
 'hpregenperlevel',
 'mpregen',
 'mpregenperlevel',
 'crit',
 'critperlevel',
 'attackdamage',
 'attackdamageperlevel',
 'attackspeedperlevel',
 'attackspeed'], ascending = [True, False, False, False, False, 
                              False, False, False, False, False,
                             False, False, False, False, False,
                             False, False, False, False, False, False])
    .drop_duplicates('matchId')
    [['matchId', 'teamId']]
    .reset_index(drop = True)
)

train_predictions_added1

Unnamed: 0,matchId,teamId
0,0,200
1,1,200
2,2,200
3,3,100
4,4,200
...,...,...
7995,7995,200
7996,7996,200
7997,7997,100
7998,7998,200


In [142]:
accuracy_score(
    y_true = y_train['winner'],
    y_pred = train_predictions_added1['teamId']
)

0.502