In [93]:
import numpy as np
import pandas as pd
import json
from typing import Dict
from itertools import groupby
from functools import reduce
from collections import Counter
import scipy.sparse as sps
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

In [94]:
df = pd.read_json("export/export_0.json")

In [95]:
# List of all items
items_list = list(range(1,10)) + [10 * i + j for i in range(1,10) for j in range(i,10)]

In [96]:
# Converting items to their corresponding vectors
# Similar to one hot encoding
# There are 9 basic items. Each basic item is alloted 2 bits. Therefore, size of vector = 9*2 = 18
# Basic item to index mapping is as follows:
# {1: (0, 1),
#  2: (2, 3),
#  3: (4, 5),
#  4: (6, 7),
#  5: (8, 9),
#  6: (10, 11),
#  7: (12, 13),
#  8: (14, 15),
#  9: (16, 17)}
# For basic items, the leftmost bit alloted to that item is set to 1. 
# eg: item_2 => [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# For combined items, the leftmost bits alloted to the 2 basic items
# used to create the combined item are set to 1.
# eg: item_23 => [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# for combined items created using 2 of the same item, both the bits alloted to the basic item are set to 1.
# eg: item_33 => [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
def vectorise_items(items):
    item_index = {x:y for x,y in zip(range(1,10),range(0,18,2))}
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([18])
        for i in item_str:
            first_index = item_index[int(i)]
            if(not vect[first_index]):
                vect[first_index] = 1
            else:
                vect[first_index+1] = 1
        res[item] = vect
    res[0] = np.zeros([18])
    return res

In [97]:
item_vector_dict = vectorise_items(items_list)

In [98]:
#Make a dataframe with character_id, level, and items
char_item_rank = [(char["character_id"], char["level"], \
                   np.pad(i := np.asarray(char["items"]), (0,3-len(i))), player["final"]["placement"]["rank"]) \
                      for player in df.player for char in player["final"]["comp"]]
     
char_item_rank_df = pd.DataFrame(char_item_rank, columns=["character_id","level","items","rank"])

In [99]:
char_item_rank_df.head(10)

Unnamed: 0,character_id,level,items,rank
0,TFT3_Ashe,2,"[0.0, 0.0, 0.0]",4
1,TFT3_Xayah,2,"[19, 23, 0]",4
2,TFT3_Urgot,1,"[3, 77, 0]",4
3,TFT3_XinZhao,2,"[0.0, 0.0, 0.0]",4
4,TFT3_JarvanIV,2,"[55, 66, 77]",4
5,TFT3_Rakan,2,"[0.0, 0.0, 0.0]",4
6,TFT3_Riven,2,"[0.0, 0.0, 0.0]",4
7,TFT3_Irelia,2,"[15, 19, 44]",4
8,TFT3_Cassiopeia,3,"[25, 0, 0]",3
9,TFT3_Viktor,2,"[37, 0, 0]",3


In [100]:
def item_vector_lookup(item_list, d=item_vector_dict):
    return [[d[item] for item in items] for items in item_list]
        

In [101]:
# Convert a list of items to vectors
def item_vector_lookup2(item_list, d=item_vector_dict):
    res = []
    for items in item_list:
        temp_res = []
        for item in items:
            temp_res = temp_res + d[item].tolist()
        res.append(temp_res)
    return res


In [102]:
# add item vectors to the existing data frame
temp_df = char_item_rank_df.join(pd.DataFrame(item_vector_lookup2(char_item_rank_df["items"]) ,\
                                                        columns = [f"item_index{x}" for x in range(1,55)]))                

In [103]:
temp_df["rank"] = temp_df["rank"].astype('float')

In [104]:
temp_df.columns

Index(['character_id', 'level', 'items', 'rank', 'item_index1', 'item_index2',
       'item_index3', 'item_index4', 'item_index5', 'item_index6',
       'item_index7', 'item_index8', 'item_index9', 'item_index10',
       'item_index11', 'item_index12', 'item_index13', 'item_index14',
       'item_index15', 'item_index16', 'item_index17', 'item_index18',
       'item_index19', 'item_index20', 'item_index21', 'item_index22',
       'item_index23', 'item_index24', 'item_index25', 'item_index26',
       'item_index27', 'item_index28', 'item_index29', 'item_index30',
       'item_index31', 'item_index32', 'item_index33', 'item_index34',
       'item_index35', 'item_index36', 'item_index37', 'item_index38',
       'item_index39', 'item_index40', 'item_index41', 'item_index42',
       'item_index43', 'item_index44', 'item_index45', 'item_index46',
       'item_index47', 'item_index48', 'item_index49', 'item_index50',
       'item_index51', 'item_index52', 'item_index53', 'item_index54'],
    

In [105]:
temp_df = temp_df.drop(["level"], axis=1)

In [106]:
# Use one hot encoding to encode character_id
temp_df = temp_df.join(pd.get_dummies(temp_df["character_id"]))

In [107]:
# Split dataset into training and testing set
train, test = train_test_split(temp_df,test_size=0.2)

In [108]:
test_x = test.iloc[:,3:].to_numpy()
train_x = train.iloc[:,3:].to_numpy()

In [109]:
test_y = test.iloc[:,2].to_numpy()
train_y = train.iloc[:,2].to_numpy()

In [110]:
reg = RandomForestRegressor(min_samples_leaf=1, min_samples_split=22, n_estimators=110, n_jobs=-1,verbose=5)

In [111]:
reg.fit(train_x,train_y)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s


building tree 1 of 110building tree 2 of 110

building tree 3 of 110building tree 4 of 110
building tree 5 of 110building tree 6 of 110


building tree 7 of 110building tree 8 of 110

building tree 9 of 110
building tree 10 of 110building tree 11 of 110

building tree 12 of 110
building tree 13 of 110
building tree 14 of 110building tree 15 of 110

building tree 16 of 110
building tree 17 of 110
building tree 18 of 110
building tree 19 of 110
building tree 20 of 110
building tree 21 of 110
building tree 22 of 110
building tree 23 of 110
building tree 24 of 110
building tree 25 of 110
building tree 26 of 110
building tree 27 of 110
building tree 28 of 110
building tree 29 of 110
building tree 30 of 110
building tree 31 of 110building tree 32 of 110

building tree 33 of 110
building tree 34 of 110
building tree 35 of 110
building tree 36 of 110
building tree 37 of 110
building tree 38 of 110
building tree 39 of 110
building tree 40 of 110
building tree 41 of 110
building tree 42 of 110
b

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    1.2s


building tree 64 of 110
building tree 65 of 110
building tree 66 of 110
building tree 67 of 110
building tree 68 of 110building tree 69 of 110

building tree 70 of 110
building tree 71 of 110
building tree 72 of 110
building tree 73 of 110
building tree 74 of 110
building tree 75 of 110
building tree 76 of 110
building tree 77 of 110
building tree 78 of 110
building tree 79 of 110
building tree 80 of 110
building tree 81 of 110
building tree 82 of 110
building tree 83 of 110
building tree 84 of 110
building tree 85 of 110
building tree 86 of 110
building tree 87 of 110
building tree 88 of 110
building tree 89 of 110
building tree 90 of 110
building tree 91 of 110
building tree 92 of 110
building tree 93 of 110
building tree 94 of 110
building tree 95 of 110
building tree 96 of 110
building tree 97 of 110
building tree 98 of 110
building tree 99 of 110
building tree 100 of 110
building tree 101 of 110
building tree 102 of 110
building tree 103 of 110
building tree 104 of 110
building tr

[Parallel(n_jobs=-1)]: Done 110 out of 110 | elapsed:    2.4s finished


RandomForestRegressor(min_samples_split=22, n_estimators=110, n_jobs=-1,
                      verbose=5)

In [112]:
pred_y = reg.predict(test_x)

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 110 out of 110 | elapsed:    0.0s finished


In [113]:
mean_squared_error(test_y,pred_y)

5.318220341595731

In [114]:
reg2 = RandomForestRegressor(n_jobs=-1)

In [115]:
params = {"n_estimators" : np.arange(50,300,30),
"min_samples_split" : np.arange(2,38,4),
"min_samples_leaf" : np.arange(1,10,2),
}
params

{'n_estimators': array([ 50,  80, 110, 140, 170, 200, 230, 260, 290]),
 'min_samples_split': array([ 2,  6, 10, 14, 18, 22, 26, 30, 34]),
 'min_samples_leaf': array([1, 3, 5, 7, 9])}

In [116]:
#clf = GridSearchCV(reg2, params, verbose=10, n_jobs=-1, cv=3)

In [117]:
#clf.fit(train_x,train_y)