In [1]:
import numpy as np
import pandas as pd
import json
from typing import Dict
from itertools import groupby
from functools import reduce
from collections import Counter
import scipy.sparse as sps
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
from sklearn.model_selection import GridSearchCV

In [2]:
df = pd.read_json("export/export_0.json")

In [3]:
# List of all items
items_list = list(range(1,10)) + [10 * i + j for i in range(1,10) for j in range(i,10)]

In [4]:
# Converting items to their corresponding vectors
# Similar to one hot encoding
# There are 9 basic items. Each basic item is alloted 2 bits. Therefore, size of vector = 9*2 = 18
# Basic item to index mapping is as follows:
# {1: (0, 1),
#  2: (2, 3),
#  3: (4, 5),
#  4: (6, 7),
#  5: (8, 9),
#  6: (10, 11),
#  7: (12, 13),
#  8: (14, 15),
#  9: (16, 17)}
# For basic items, the leftmost bit alloted to that item is set to 1. 
# eg: item_2 => [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# For combined items, the leftmost bits alloted to the 2 basic items
# used to create the combined item are set to 1.
# eg: item_23 => [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# for combined items created using 2 of the same item, both the bits alloted to the basic item are set to 1.
# eg: item_33 => [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def vectorise_items(items):
    item_index = {x:y for x,y in zip(range(1,10),range(0,18,2))}
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([18])
        for i in item_str:
            first_index = item_index[int(i)]
            if(not vect[first_index]):
                vect[first_index] = 1
            else:
                vect[first_index+1] = 1
        res[item] = vect
    res[0] = np.zeros([18])
    return res

In [5]:
def vectorise_items2(items):
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([9])
        for i in item_str:
            vect[int(i)-1] += 1
        res[item] = vect
    res[0] = np.zeros([9])
    return res

In [6]:
item_vector_dict = vectorise_items(items_list)

In [7]:
# Convert a list of items to vectors
def item_vector_lookup(item_list, d=item_vector_dict):
    res = []
    for items in item_list:
        temp_res = []
        for item in items:
            temp_res = temp_res + d[item].tolist()
        res.append(temp_res)
    return res


In [8]:
clf = RandomForestClassifier( n_estimators = 50, n_jobs=-1, verbose=7, warm_start=True)

In [9]:
def runRF(df,char_key,clf):
    #char_key = 'TFT3_Ahri'
    #Make a dataframe with character_id, level, and items
    char_item_rank = [(char["character_id"], char["level"], \
                       np.pad(i := np.asarray(char["items"]), (0,3-len(i))), player["final"]["placement"]["rank"]) \
                          for player in df.player for char in player["final"]["comp"]]

    char_item_rank_df = pd.DataFrame(char_item_rank, columns=["character_id","level","items","rank"])
    char_item_rank_df["rank"] = char_item_rank_df["rank"].astype("float")
    # add item vectors to the existing data frame
    temp_df = char_item_rank_df.join(pd.DataFrame(item_vector_lookup(char_item_rank_df["items"]) ,\
                                                            columns = [f"item_index{x}" for x in range(1,55)]))
    temp_df["rank"] = temp_df["rank"].astype('float')
    temp_df = temp_df.drop(["level"], axis=1)
    temp_df["top4"] = np.where(temp_df["rank"] <= 4, 1,0)
    # Group by character_id and create a separate dataframe fro each unique character_id
    chars_df_dict = {x:y for x,y in temp_df.groupby(temp_df["character_id"])}
    curr_df = chars_df_dict[char_key]
    X = curr_df.iloc[:,3:-1]
    Y = curr_df.iloc[:,-1]
#     train_x, test_x, train_y, test_y = train_test_split(X,Y,test_size=0.2)
    
    clf.fit(X,Y)
    clf.n_estimators += 50
    #pred_y = clf.predict(test_x)
    #accuracy_score(test_y, pred_y)


In [10]:
clf = RandomForestClassifier( n_estimators=50, n_jobs=-1, verbose=5, warm_start=True)
#clf = LogisticRegression( n_jobs=-1, verbose=5, warm_start=True)
for i in range(10,101,10):
    print(f"Current i = {i}")
    df = pd.read_json(f"export/export_{i}.json")
    runRF(df,'TFT3_Ahri',clf)
    del(df)

Current i = 10


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.


building tree 1 of 50
building tree 2 of 50building tree 3 of 50
building tree 4 of 50
building tree 5 of 50
building tree 6 of 50

building tree 7 of 50building tree 8 of 50

building tree 9 of 50
building tree 10 of 50building tree 11 of 50

building tree 12 of 50building tree 13 of 50

building tree 14 of 50
building tree 15 of 50building tree 16 of 50

building tree 17 of 50
building tree 18 of 50
building tree 19 of 50
building tree 20 of 50building tree 21 of 50

building tree 22 of 50
building tree 23 of 50
building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50
building tree 29 of 50
building tree 30 of 50building tree 31 of 50

building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50building tree 36 of 50

building tree 37 of 50
building tree 38 of 50
building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50building tree 44 of 5

[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.6s finished


Current i = 20


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


building tree 1 of 50building tree 2 of 50
building tree 3 of 50
building tree 4 of 50
building tree 5 of 50building tree 6 of 50

building tree 7 of 50

building tree 8 of 50
building tree 9 of 50
building tree 10 of 50
building tree 11 of 50
building tree 12 of 50
building tree 13 of 50
building tree 14 of 50
building tree 15 of 50
building tree 16 of 50
building tree 17 of 50
building tree 18 of 50
building tree 19 of 50
building tree 20 of 50building tree 21 of 50

building tree 22 of 50
building tree 23 of 50
building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50building tree 29 of 50
building tree 30 of 50

building tree 31 of 50
building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50building tree 38 of 50

building tree 39 of 50building tree 40 of 50

building tree 41 of 50
building tree 42 of 50
building tree 43 of 50building tree 44 of 5

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 30
building tree 1 of 50
building tree 2 of 50building tree 3 of 50

building tree 4 of 50building tree 5 of 50
building tree 6 of 50
building tree 7 of 50
building tree 8 of 50

building tree 9 of 50
building tree 10 of 50
building tree 11 of 50
building tree 12 of 50
building tree 13 of 50
building tree 14 of 50
building tree 15 of 50
building tree 16 of 50
building tree 17 of 50
building tree 18 of 50
building tree 19 of 50
building tree 20 of 50
building tree 21 of 50
building tree 22 of 50building tree 23 of 50

building tree 24 of 50
building tree 25 of 50
building tree 26 of 50building tree 27 of 50

building tree 28 of 50
building tree 29 of 50
building tree 30 of 50
building tree 31 of 50
building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50building tree 37 of 50

building tree 38 of 50
building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50
build

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 40
building tree 1 of 50building tree 2 of 50

building tree 3 of 50building tree 4 of 50building tree 5 of 50

building tree 6 of 50
building tree 7 of 50building tree 8 of 50


building tree 9 of 50
building tree 10 of 50
building tree 11 of 50
building tree 12 of 50
building tree 13 of 50building tree 14 of 50

building tree 15 of 50
building tree 16 of 50
building tree 17 of 50building tree 18 of 50

building tree 19 of 50building tree 20 of 50

building tree 21 of 50
building tree 22 of 50
building tree 23 of 50
building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50
building tree 29 of 50
building tree 30 of 50
building tree 31 of 50
building tree 32 of 50building tree 33 of 50

building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50
building tree 38 of 50
building tree 39 of 50
building tree 40 of 50
building tree 41 of 50building tree 42 of 50

building tree 43 of 50
build

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 50
building tree 1 of 50
building tree 2 of 50building tree 3 of 50
building tree 4 of 50

building tree 5 of 50building tree 6 of 50

building tree 7 of 50building tree 8 of 50

building tree 9 of 50
building tree 10 of 50building tree 11 of 50

building tree 12 of 50
building tree 13 of 50
building tree 14 of 50building tree 15 of 50

building tree 16 of 50
building tree 17 of 50
building tree 18 of 50
building tree 19 of 50building tree 20 of 50

building tree 21 of 50
building tree 22 of 50building tree 23 of 50

building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50building tree 29 of 50building tree 30 of 50


building tree 31 of 50
building tree 32 of 50
building tree 33 of 50building tree 34 of 50

building tree 35 of 50
building tree 36 of 50
building tree 37 of 50
building tree 38 of 50building tree 39 of 50

building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50buildi

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 60
building tree 1 of 50
building tree 2 of 50
building tree 3 of 50
building tree 4 of 50building tree 5 of 50

building tree 6 of 50
building tree 7 of 50building tree 8 of 50

building tree 9 of 50
building tree 10 of 50
building tree 11 of 50
building tree 12 of 50
building tree 13 of 50
building tree 14 of 50
building tree 15 of 50building tree 16 of 50

building tree 17 of 50
building tree 18 of 50
building tree 19 of 50building tree 20 of 50

building tree 21 of 50
building tree 22 of 50building tree 23 of 50

building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50building tree 28 of 50

building tree 29 of 50
building tree 30 of 50
building tree 31 of 50
building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50
building tree 38 of 50
building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50
build

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 70
building tree 1 of 50
building tree 2 of 50building tree 3 of 50

building tree 4 of 50building tree 5 of 50

building tree 6 of 50
building tree 7 of 50building tree 8 of 50

building tree 9 of 50
building tree 10 of 50
building tree 11 of 50
building tree 12 of 50
building tree 13 of 50
building tree 14 of 50building tree 15 of 50building tree 16 of 50


building tree 17 of 50
building tree 18 of 50
building tree 19 of 50building tree 20 of 50

building tree 21 of 50building tree 22 of 50
building tree 23 of 50

building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50building tree 29 of 50

building tree 30 of 50
building tree 31 of 50
building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50
building tree 38 of 50
building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50building tree 43 of 50

build

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 80
building tree 1 of 50building tree 2 of 50

building tree 3 of 50building tree 4 of 50building tree 5 of 50
building tree 6 of 50


building tree 7 of 50building tree 8 of 50

building tree 9 of 50
building tree 10 of 50building tree 11 of 50

building tree 12 of 50
building tree 13 of 50building tree 14 of 50

building tree 15 of 50
building tree 16 of 50
building tree 17 of 50
building tree 18 of 50
building tree 19 of 50
building tree 20 of 50
building tree 21 of 50
building tree 22 of 50building tree 23 of 50

building tree 24 of 50
building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50
building tree 29 of 50
building tree 30 of 50
building tree 31 of 50building tree 32 of 50

building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50building tree 38 of 50

building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50
build

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished


Current i = 90
building tree 1 of 50
building tree 2 of 50building tree 3 of 50
building tree 4 of 50building tree 5 of 50
building tree 6 of 50


building tree 7 of 50building tree 8 of 50

building tree 9 of 50
building tree 10 of 50
building tree 11 of 50building tree 12 of 50

building tree 13 of 50
building tree 14 of 50
building tree 15 of 50
building tree 16 of 50
building tree 17 of 50
building tree 18 of 50
building tree 19 of 50
building tree 20 of 50
building tree 21 of 50
building tree 22 of 50
building tree 23 of 50building tree 24 of 50

building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50
building tree 29 of 50
building tree 30 of 50
building tree 31 of 50
building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50building tree 38 of 50

building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50
build

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done  46 out of  50 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.1s finished



building tree 1 of 50building tree 2 of 50

building tree 3 of 50
building tree 4 of 50building tree 5 of 50

building tree 6 of 50
building tree 7 of 50
building tree 8 of 50
building tree 9 of 50
building tree 10 of 50
building tree 11 of 50building tree 12 of 50

building tree 13 of 50building tree 14 of 50

building tree 15 of 50building tree 16 of 50

building tree 17 of 50
building tree 18 of 50building tree 19 of 50

building tree 20 of 50
building tree 21 of 50
building tree 22 of 50
building tree 23 of 50building tree 24 of 50

building tree 25 of 50
building tree 26 of 50
building tree 27 of 50
building tree 28 of 50
building tree 29 of 50
building tree 30 of 50
building tree 31 of 50
building tree 32 of 50
building tree 33 of 50
building tree 34 of 50
building tree 35 of 50
building tree 36 of 50
building tree 37 of 50
building tree 38 of 50
building tree 39 of 50
building tree 40 of 50
building tree 41 of 50
building tree 42 of 50
building tree 43 of 50building tree 44 of 

In [11]:
clf.n_estimators

550

In [12]:
df = pd.read_json("export/export_0.json")

In [16]:
char_key = 'TFT3_Ahri'
char_item_rank = [(char["character_id"], char["level"], \
                       np.pad(i := np.asarray(char["items"]), (0,3-len(i))), player["final"]["placement"]["rank"]) \
                          for player in df.player for char in player["final"]["comp"]]

char_item_rank_df = pd.DataFrame(char_item_rank, columns=["character_id","level","items","rank"])
char_item_rank_df["rank"] = char_item_rank_df["rank"].astype("float")
# add item vectors to the existing data frame
temp_df = char_item_rank_df.join(pd.DataFrame(item_vector_lookup(char_item_rank_df["items"]) ,\
                                                        columns = [f"item_index{x}" for x in range(1,55)]))
temp_df["rank"] = temp_df["rank"].astype('float')
temp_df = temp_df.drop(["level"], axis=1)
temp_df["top4"] = np.where(temp_df["rank"] <= 4, 1,0)
# Group by character_id and create a separate dataframe fro each unique character_id
chars_df_dict = {x:y for x,y in temp_df.groupby(temp_df["character_id"])}
curr_df = chars_df_dict[char_key]
X = curr_df.iloc[:,3:-1]
Y = curr_df.iloc[:,-1]

In [19]:
pred_y = clf.predict(X)

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 272 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    0.1s finished


In [21]:
accuracy_score(Y,pred_y)

0.5650406504065041

In [22]:
pred_y

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1])