In [2]:
import numpy as np
import pandas as pd
import json
from typing import Dict
from itertools import groupby
from functools import reduce
from collections import Counter
import scipy.sparse as sps
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV

In [3]:
df = pd.read_json("export/export_0.json")
for i in range(1,2):
    dframe = pd.read_json("export/export_{}0.json".format(str(i)))
    df = df.append(dframe)

In [52]:
# List of all items
items_list = list(range(1,10)) + [10 * i + j for i in range(1,10) for j in range(i,10)]

In [53]:

# Converting items to their corresponding vectors
# Similar to one hot encoding
# There are 9 basic items. Each basic item is alloted 2 bits. Therefore, size of vector = 9*2 = 18
# Basic item to index mapping is as follows:
# {1: (0, 1),
#  2: (2, 3),
#  3: (4, 5),
#  4: (6, 7),
#  5: (8, 9),
#  6: (10, 11),
#  7: (12, 13),
#  8: (14, 15),
#  9: (16, 17)}
# For basic items, the leftmost bit alloted to that item is set to 1. 
# eg: item_2 => [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# For combined items, the leftmost bits alloted to the 2 basic items
# used to create the combined item are set to 1.
# eg: item_23 => [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# for combined items created using 2 of the same item, both the bits alloted to the basic item are set to 1.
# eg: item_33 => [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
def vectorise_items(items):
    item_index = {x:y for x,y in zip(range(1,10),range(0,18,2))}
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([18])
        for i in item_str:
            first_index = item_index[int(i)]
            if(not vect[first_index]):
                vect[first_index] = 1
            else:
                vect[first_index+1] = 1
        res[item] = vect
    res[0] = np.zeros([18])
    return res

In [54]:
item_vector_dict = vectorise_items(items_list)

In [55]:
with open('full_mapping.json') as json_data:
    fullmap = json.load(json_data)

active_trait = pd.DataFrame(fullmap['trait_tier_mapping_json'].items())
active_trait.columns = ['trait', 'min_active']
print(active_trait)

            trait                                         min_active
0     Blademaster        {'1': [3, 6], '2': [6, 9], '3': [9, 25000]}
1         Blaster                     {'1': [2, 4], '2': [4, 25000]}
2         Brawler                     {'1': [2, 4], '2': [4, 25000]}
3   Demolitionist                                  {'1': [2, 25000]}
4     Infiltrator        {'1': [2, 4], '2': [4, 6], '3': [6, 25000]}
5      ManaReaver                                  {'1': [2, 25000]}
6       Mercenary                                  {'1': [1, 25000]}
7          Mystic                     {'1': [2, 4], '2': [4, 25000]}
8         Paragon                                  {'1': [1, 25000]}
9       Protector        {'1': [2, 4], '2': [4, 6], '3': [6, 25000]}
10         Sniper                     {'1': [2, 4], '2': [4, 25000]}
11       Sorcerer        {'1': [2, 4], '2': [4, 6], '3': [6, 25000]}
12       Starship                                  {'1': [1, 25000]}
13       Vanguard        {'1': [2,

In [213]:
#Make a dataframe with character_id, level, and items
char_item_rank = [(char["character_id"], char["level"], \
                   np.pad(i := np.asarray(char["items"]), (0,3-len(i))), player["final"]["placement"]["rank"], 
                   player["final"]["level"], player['final']['gold_spent'], player["final"]["traits"]) \
                      for player in df.player for char in player["final"]["comp"]]
                        
     
char_item_rank_df = pd.DataFrame(char_item_rank, columns=["character_id","level","items","rank","player_final_level","final_gold","traits"])
print(char_item_rank_df)

           character_id level            items rank  player_final_level  \
0             TFT3_Ashe     2  [0.0, 0.0, 0.0]    4                   8   
1            TFT3_Xayah     2      [19, 23, 0]    4                   8   
2            TFT3_Urgot     1       [3, 77, 0]    4                   8   
3          TFT3_XinZhao     2  [0.0, 0.0, 0.0]    4                   8   
4         TFT3_JarvanIV     2     [55, 66, 77]    4                   8   
...                 ...   ...              ...  ...                 ...   
100267      TFT3_Ezreal     2  [0.0, 0.0, 0.0]    1                   8   
100268    TFT3_Nocturne     2       [11, 0, 0]    1                   8   
100269       TFT3_Urgot     2     [49, 66, 69]    1                   8   
100270  TFT3_Cassiopeia     2  [0.0, 0.0, 0.0]    1                   8   
100271      TFT3_Illaoi     3      [55, 66, 0]    1                   8   

        final_gold                                             traits  
0              298  {'Set3_

In [215]:
def is_empty(item_list):
    #True = all zeros, False = non-zeros
    return not np.any(item_list)
    
char_item_rank_df = char_item_rank_df[char_item_rank_df['items'].map(is_empty) != True].reset_index()
print(char_item_rank_df)

       level_0   index     character_id level         items rank  \
0            0       1       TFT3_Xayah     2   [19, 23, 0]    4   
1            1       2       TFT3_Urgot     1    [3, 77, 0]    4   
2            2       4    TFT3_JarvanIV     2  [55, 66, 77]    4   
3            3       7      TFT3_Irelia     2  [15, 19, 44]    4   
4            4       8  TFT3_Cassiopeia     3    [25, 0, 0]    3   
...        ...     ...              ...   ...           ...  ...   
54677    54677  100265      TFT3_KogMaw     2  [24, 24, 57]    1   
54678    54678  100266      TFT3_Viktor     2    [11, 0, 0]    1   
54679    54679  100268    TFT3_Nocturne     2    [11, 0, 0]    1   
54680    54680  100269       TFT3_Urgot     2  [49, 66, 69]    1   
54681    54681  100271      TFT3_Illaoi     3   [55, 66, 0]    1   

       player_final_level  final_gold  \
0                       8         298   
1                       8         298   
2                       8         298   
3                  

In [216]:
char_item_rank_df = char_item_rank_df.drop(["index", "level_0"], axis = 1)


In [217]:
print(char_item_rank_df)

          character_id level         items rank  player_final_level  \
0           TFT3_Xayah     2   [19, 23, 0]    4                   8   
1           TFT3_Urgot     1    [3, 77, 0]    4                   8   
2        TFT3_JarvanIV     2  [55, 66, 77]    4                   8   
3          TFT3_Irelia     2  [15, 19, 44]    4                   8   
4      TFT3_Cassiopeia     3    [25, 0, 0]    3                   9   
...                ...   ...           ...  ...                 ...   
54677      TFT3_KogMaw     2  [24, 24, 57]    1                   8   
54678      TFT3_Viktor     2    [11, 0, 0]    1                   8   
54679    TFT3_Nocturne     2    [11, 0, 0]    1                   8   
54680       TFT3_Urgot     2  [49, 66, 69]    1                   8   
54681      TFT3_Illaoi     3   [55, 66, 0]    1                   8   

       final_gold                                             traits  
0             298  {'Set3_Celestial': 4, 'Set3_Blademaster': 3, '...  
1    

In [218]:
char_item_rank_df.isnull().values.any()

False

In [219]:
trait_index = {k: v for v, k in enumerate(active_trait['trait'].tolist())} 
print(trait_index)

{'Blademaster': 0, 'Blaster': 1, 'Brawler': 2, 'Demolitionist': 3, 'Infiltrator': 4, 'ManaReaver': 5, 'Mercenary': 6, 'Mystic': 7, 'Paragon': 8, 'Protector': 9, 'Sniper': 10, 'Sorcerer': 11, 'Starship': 12, 'Vanguard': 13, 'Astro': 14, 'Battlecast': 15, 'Celestial': 16, 'Chrono': 17, 'Cybernetic': 18, 'DarkStar': 19, 'MechPilot': 20, 'Rebel': 21, 'SpacePirate': 22, 'StarGuardian': 23}


In [220]:
def encode_traits(trait_dict_column, active_trait_df, trait_index):
    encoding_list = []
    for trait_dict in trait_dict_column:
        trait_encoding = [ 0 for i in range(len(trait_index)) ]
        for key, value in trait_dict.items():
            if key[:5] == 'Set3_':
                trait = key[5:]
            else:
                trait = key
            min_active_dict = active_trait_df.loc[active_trait_df['trait'] == trait]['min_active'].values[0]
            has_trait_flag = 0
            for k, v in min_active_dict.items():
                if value >= v[0] and value < v[1]:
                    trait_tier = int(k)
                    has_trait_flag = 1
            if has_trait_flag == 0:
                trait_tier = 0
            trait_encoding[trait_index[trait]] = trait_tier
        encoding_list.append(trait_encoding)
    return encoding_list

In [221]:
def item_vector_lookup(item_list, d=item_vector_dict):
    return [[d[item] for item in items] for items in item_list]

In [222]:
# Convert a list of items to vectors
def item_vector_lookup2(item_list, d=item_vector_dict):
    res = []
    for items in item_list:
        temp_res = []
        for item in items:
            temp_res = temp_res + d[item].tolist()
        res.append(temp_res)
    return res

In [223]:
hi = pd.DataFrame(item_vector_lookup2(char_item_rank_df["items"]))
print(hi)

        0    1    2    3    4    5    6    7    8    9   ...   44   45   46  \
0      1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
1      0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
2      0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  1.0  ...  0.0  0.0  0.0   
3      1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  ...  0.0  0.0  0.0   
4      0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  ...  0.0  0.0  0.0   
...    ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
54677  0.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  ...  1.0  0.0  0.0   
54678  1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
54679  1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
54680  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  ...  0.0  0.0  1.0   
54681  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  1.0  ...  0.0  0.0  0.0   

        47   48   49   50   51   52   53  
0      0

In [224]:
# add item vectors to the existing data frame
temp_df = char_item_rank_df.join(pd.DataFrame(item_vector_lookup2(char_item_rank_df["items"]),
                                              columns = [f"item_index{x}" for x in range(1,55)]))
temp_df.isnull().values.any()

False

In [225]:
temp_df.head()

Unnamed: 0,character_id,level,items,rank,player_final_level,final_gold,traits,item_index1,item_index2,item_index3,...,item_index45,item_index46,item_index47,item_index48,item_index49,item_index50,item_index51,item_index52,item_index53,item_index54
0,TFT3_Xayah,2,"[19, 23, 0]",4,8,298,"{'Set3_Celestial': 4, 'Set3_Blademaster': 3, '...",1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,TFT3_Urgot,1,"[3, 77, 0]",4,8,298,"{'Set3_Celestial': 4, 'Set3_Blademaster': 3, '...",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,TFT3_JarvanIV,2,"[55, 66, 77]",4,8,298,"{'Set3_Celestial': 4, 'Set3_Blademaster': 3, '...",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
3,TFT3_Irelia,2,"[15, 19, 44]",4,8,298,"{'Set3_Celestial': 4, 'Set3_Blademaster': 3, '...",1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,TFT3_Cassiopeia,3,"[25, 0, 0]",3,9,335,"{'Set3_Mystic': 2, 'Battlecast': 6, 'Blaster':...",0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [226]:
temp_df = temp_df.join(pd.DataFrame(encode_traits(char_item_rank_df["traits"], active_trait, trait_index),
                                    columns = [trait for trait in active_trait['trait'].tolist()]))
temp_df.isnull().values.any()

False

In [227]:
temp_df["rank"] = temp_df["rank"].astype('float')

In [228]:
temp_df.columns

Index(['character_id', 'level', 'items', 'rank', 'player_final_level',
       'final_gold', 'traits', 'item_index1', 'item_index2', 'item_index3',
       'item_index4', 'item_index5', 'item_index6', 'item_index7',
       'item_index8', 'item_index9', 'item_index10', 'item_index11',
       'item_index12', 'item_index13', 'item_index14', 'item_index15',
       'item_index16', 'item_index17', 'item_index18', 'item_index19',
       'item_index20', 'item_index21', 'item_index22', 'item_index23',
       'item_index24', 'item_index25', 'item_index26', 'item_index27',
       'item_index28', 'item_index29', 'item_index30', 'item_index31',
       'item_index32', 'item_index33', 'item_index34', 'item_index35',
       'item_index36', 'item_index37', 'item_index38', 'item_index39',
       'item_index40', 'item_index41', 'item_index42', 'item_index43',
       'item_index44', 'item_index45', 'item_index46', 'item_index47',
       'item_index48', 'item_index49', 'item_index50', 'item_index51',
       

In [229]:
temp_df = temp_df.drop(["level", "traits"], axis = 1)

In [230]:
# Use one hot encoding to encode character_id
temp_df = temp_df.join(pd.get_dummies(temp_df["character_id"]))
temp_df.head(20)

Unnamed: 0,character_id,items,rank,player_final_level,final_gold,item_index1,item_index2,item_index3,item_index4,item_index5,...,TFT3_Vi,TFT3_Viktor,TFT3_WuKong,TFT3_Xayah,TFT3_Xerath,TFT3_XinZhao,TFT3_Yasuo,TFT3_Zed,TFT3_Ziggs,TFT3_Zoe
0,TFT3_Xayah,"[19, 23, 0]",4.0,8,298,1.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
1,TFT3_Urgot,"[3, 77, 0]",4.0,8,298,0.0,0.0,0.0,0.0,1.0,...,0,0,0,0,0,0,0,0,0,0
2,TFT3_JarvanIV,"[55, 66, 77]",4.0,8,298,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,TFT3_Irelia,"[15, 19, 44]",4.0,8,298,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
4,TFT3_Cassiopeia,"[25, 0, 0]",3.0,9,335,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
5,TFT3_Viktor,"[37, 0, 0]",3.0,9,335,0.0,0.0,0.0,0.0,1.0,...,0,1,0,0,0,0,0,0,0,0
6,TFT3_KogMaw,"[11, 24, 67]",3.0,9,335,1.0,1.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
7,TFT3_Ezreal,"[36, 0, 0]",3.0,9,335,0.0,0.0,0.0,0.0,1.0,...,0,0,0,0,0,0,0,0,0,0
8,TFT3_Urgot,"[68, 0, 0]",3.0,9,335,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
9,TFT3_Nocturne,"[67, 0, 0]",3.0,9,335,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [231]:
temp_df.isnull().values.any()

False

In [249]:
# Split dataset into training and testing set
train, test = train_test_split(temp_df,test_size=0.2)

In [250]:
train.isnull().values.any()


False

In [251]:
test.isnull().values.any()

False

In [252]:
test.columns

Index(['character_id', 'items', 'rank', 'player_final_level', 'final_gold',
       'item_index1', 'item_index2', 'item_index3', 'item_index4',
       'item_index5',
       ...
       'TFT3_Vi', 'TFT3_Viktor', 'TFT3_WuKong', 'TFT3_Xayah', 'TFT3_Xerath',
       'TFT3_XinZhao', 'TFT3_Yasuo', 'TFT3_Zed', 'TFT3_Ziggs', 'TFT3_Zoe'],
      dtype='object', length=140)

In [253]:
test_x = test.iloc[:,3:].to_numpy()
train_x = train.iloc[:,3:].to_numpy()

In [254]:
test_y = test.iloc[:,2].to_numpy()
train_y = train.iloc[:,2].to_numpy()

In [255]:
print(train_x)
print(test_x)
print(train_y)

[[  9. 335.   0. ...   0.   0.   0.]
 [  9. 406.   1. ...   0.   0.   0.]
 [  8. 342.   0. ...   0.   0.   0.]
 ...
 [  9. 347.   0. ...   0.   0.   0.]
 [  8. 321.   0. ...   1.   0.   0.]
 [  8. 226.   0. ...   0.   0.   0.]]
[[  7. 299.   0. ...   0.   0.   0.]
 [  8. 308.   1. ...   0.   0.   0.]
 [  8. 244.   0. ...   0.   0.   0.]
 ...
 [  8. 378.   1. ...   0.   0.   0.]
 [  8. 246.   0. ...   0.   0.   0.]
 [  7. 197.   0. ...   0.   0.   0.]]
[3. 2. 2. ... 3. 4. 6.]


In [256]:
reg = RandomForestRegressor(min_samples_leaf=1, min_samples_split=22, n_estimators=110, n_jobs=-1,verbose=5)

In [None]:
reg.fit(train_x,train_y)

building tree 1 of 110building tree 2 of 110building tree 3 of 110

building tree 4 of 110
building tree 5 of 110

building tree 6 of 110building tree 7 of 110
building tree 8 of 110

building tree 9 of 110
building tree 10 of 110building tree 11 of 110
building tree 12 of 110


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.



building tree 13 of 110building tree 14 of 110

building tree 15 of 110
building tree 16 of 110building tree 17 of 110

building tree 18 of 110
building tree 19 of 110
building tree 20 of 110
building tree 21 of 110
building tree 22 of 110
building tree 23 of 110
building tree 24 of 110
building tree 25 of 110
building tree 26 of 110
building tree 27 of 110
building tree 28 of 110
building tree 29 of 110
building tree 30 of 110
building tree 31 of 110
building tree 32 of 110
building tree 33 of 110building tree 34 of 110

building tree 35 of 110
building tree 36 of 110
building tree 37 of 110
building tree 38 of 110
building tree 39 of 110
building tree 40 of 110
building tree 41 of 110
building tree 42 of 110
building tree 43 of 110
building tree 44 of 110
building tree 45 of 110
building tree 46 of 110
building tree 47 of 110
building tree 48 of 110
building tree 49 of 110
building tree 50 of 110
building tree 51 of 110
building tree 52 of 110
building tree 53 of 110
building tree 5

[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    5.6s


building tree 61 of 110
building tree 62 of 110
building tree 63 of 110
building tree 64 of 110
building tree 65 of 110
building tree 66 of 110
building tree 67 of 110
building tree 68 of 110
building tree 69 of 110
building tree 70 of 110
building tree 71 of 110
building tree 72 of 110


In [None]:
pred_y = reg.predict(test_x)

In [None]:
mean_squared_error(test_y,pred_y)

In [None]:
mean_absolute_error(test_y,pred_y)

In [None]:
reg2 = RandomForestRegressor(n_jobs=-1)

In [None]:
params = {"n_estimators" : np.arange(50,300,30),
"min_samples_split" : np.arange(2,38,4),
"min_samples_leaf" : np.arange(1,10,2),
}
params

In [None]:
#clf = GridSearchCV(reg2, params, verbose=10, n_jobs=-1, cv=3)

In [None]:
#clf.fit(train_x,train_y)

In [None]:
print(pred_y)