In [1]:
import numpy as np
import pandas as pd
import json
from typing import Dict
from itertools import groupby, chain, permutations, combinations, combinations_with_replacement
from functools import reduce
from collections import Counter, defaultdict
import re
import random
from sklearn.preprocessing import OneHotEncoder 
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score

In [2]:
df = pd.read_json("export/export_0.json")

In [3]:
with open('full_mapping.json') as json_data:
    fullmap = json.load(json_data)

active_trait = pd.DataFrame(fullmap['trait_tier_mapping_json'].items())
active_trait.columns = ['trait', 'min_active']
print(active_trait)

            trait                                         min_active
0     Blademaster        {'1': [3, 6], '2': [6, 9], '3': [9, 25000]}
1         Blaster                     {'1': [2, 4], '2': [4, 25000]}
2         Brawler                     {'1': [2, 4], '2': [4, 25000]}
3   Demolitionist                                  {'1': [2, 25000]}
4     Infiltrator        {'1': [2, 4], '2': [4, 6], '3': [6, 25000]}
5      ManaReaver                                  {'1': [2, 25000]}
6       Mercenary                                  {'1': [1, 25000]}
7          Mystic                     {'1': [2, 4], '2': [4, 25000]}
8         Paragon                                  {'1': [1, 25000]}
9       Protector        {'1': [2, 4], '2': [4, 6], '3': [6, 25000]}
10         Sniper                     {'1': [2, 4], '2': [4, 25000]}
11       Sorcerer        {'1': [2, 4], '2': [4, 6], '3': [6, 25000]}
12       Starship                                  {'1': [1, 25000]}
13       Vanguard        {'1': [2,

In [4]:
trait_index = {k: v for v, k in enumerate(active_trait['trait'].tolist())} 
print(trait_index)

{'Blademaster': 0, 'Blaster': 1, 'Brawler': 2, 'Demolitionist': 3, 'Infiltrator': 4, 'ManaReaver': 5, 'Mercenary': 6, 'Mystic': 7, 'Paragon': 8, 'Protector': 9, 'Sniper': 10, 'Sorcerer': 11, 'Starship': 12, 'Vanguard': 13, 'Astro': 14, 'Battlecast': 15, 'Celestial': 16, 'Chrono': 17, 'Cybernetic': 18, 'DarkStar': 19, 'MechPilot': 20, 'Rebel': 21, 'SpacePirate': 22, 'StarGuardian': 23}


# Gold Per Stage

In [5]:
def gold_spent(player):
    gold_spent_per_round = {}
    for currRound, group in groupby(player["gold"]["by_round"], lambda x: x["current_round"]):
        l = list(group)
        goldSpent = max(0,int(l[0]["gold"]) - int(l[-1]["gold"]))
        gold_spent_per_round[currRound] = goldSpent
    gold_spent_by_stage = defaultdict(int)
    for x in list(gold_spent_per_round.keys()):
        m = re.match(r'(\d).+', x)
        gold_spent_by_stage[int(m.group(1))] += gold_spent_per_round[m.group(0)]

    return gold_spent_by_stage

In [6]:
gold_spent(df.player.iloc[0])

defaultdict(int, {1: 4, 2: 20, 3: 13, 4: 97, 5: 40, 6: 30})

In [7]:
gold_dict = [gold_spent(df.player.iloc[i]) for i in range(df.shape[0])]

In [8]:
gold_dict[:10]

[defaultdict(int, {1: 4, 2: 20, 3: 13, 4: 97, 5: 40, 6: 30}),
 defaultdict(int, {1: 4, 2: 5, 3: 27, 4: 67, 5: 68, 6: 87}),
 defaultdict(int, {1: 2, 2: 5, 3: 86, 4: 24, 5: 70, 6: 12}),
 defaultdict(int, {1: 4, 2: 7, 3: 52, 4: 68, 5: 109, 6: 31}),
 defaultdict(int, {1: 2, 2: 11, 3: 36, 4: 93, 5: 66}),
 defaultdict(int, {1: 2, 2: 22, 3: 15, 4: 62, 5: 80}),
 defaultdict(int, {1: 4, 2: 4, 3: 53, 4: 71, 5: 76}),
 defaultdict(int, {1: 4, 2: 13, 3: 24, 4: 69, 5: 87}),
 defaultdict(int, {1: 2, 2: 9, 3: 52, 4: 79, 5: 94, 6: 18}),
 defaultdict(int, {1: 1, 2: 25, 3: 34, 4: 47, 5: 43, 6: 0})]

In [9]:
gold_df = pd.DataFrame(gold_dict).fillna(0)
gold_df.head()

Unnamed: 0,1,2,3,4,5,6,7
0,4.0,20.0,13.0,97.0,40.0,30.0,0.0
1,4.0,5.0,27.0,67.0,68.0,87.0,0.0
2,2.0,5.0,86.0,24.0,70.0,12.0,0.0
3,4.0,7.0,52.0,68.0,109.0,31.0,0.0
4,2.0,11.0,36.0,93.0,66.0,0.0,0.0


# Level Per Stage

In [10]:
def level_stage(player):
    tp_dict = {}
    
    level_agg = 0
    current_stage = 0
    count_stage = 0
    
    for key, group in groupby(player["xp"]["by_round"], lambda x: x["current_round"]): 
        round_split = key.split("-")
        stage = int(round_split[0])

        #start of loop
        if current_stage == 0:
            current_stage = stage
        #going from stage x to stage y
        elif current_stage != stage:
            level_avg = level_agg/count_stage
#             tp_dict[f"level_at_stage_{current_stage}"] = level_avg 
            tp_dict[current_stage] = level_avg 
            current_stage = stage
            level_agg = 0
            count_stage = 0

        #continue
        list_r = list(group)
        level_agg += list_r[0]['level']
        count_stage += 1

    #getting the last stage
    level_avg = level_agg/count_stage
#     tp_dict[f"level_at_stage_{current_stage}"] = level_avg
    tp_dict[current_stage] = level_avg
    
    return tp_dict

In [11]:
level_stage(df.player.iloc[0])

{1: 2.0,
 2: 4.166666666666667,
 3: 5.333333333333333,
 4: 6.5,
 5: 7.333333333333333,
 6: 8.0}

In [12]:
level_dict = [level_stage(df.player.iloc[i]) for i in range(df.shape[0])]

In [13]:
level_df = pd.DataFrame(level_dict).fillna(method='ffill',axis=1)
level_df.head()

Unnamed: 0,1,2,3,4,5,6,7
0,2.0,4.166667,5.333333,6.5,7.333333,8.0,8.0
1,2.0,4.333333,5.0,5.833333,7.166667,8.0,8.0
2,1.666667,3.666667,5.166667,6.0,6.833333,7.0,7.0
3,1.666667,4.166667,5.833333,7.333333,8.0,8.0,8.0
4,1.666667,3.5,4.833333,6.0,7.75,7.75,7.75


# Active Traits

In [14]:
def encode_traits(trait_dict_column, active_trait_df, trait_index):
    encoding_list = []
    for trait_dict in trait_dict_column:
        trait_encoding = [ 0 for i in range(len(trait_index)) ]
        for key, value in trait_dict.items():
            if key[:5] == 'Set3_':
                trait = key[5:]
            else:
                trait = key
            min_active_dict = active_trait_df.loc[active_trait_df['trait'] == trait]['min_active'].values[0]
            has_trait_flag = 0
            for k, v in min_active_dict.items():
                if value >= v[0] and value < v[1]:
                    trait_tier = int(k)
                    has_trait_flag = 1
            if has_trait_flag == 0:
                trait_tier = 0
            trait_encoding[trait_index[trait]] = trait_tier
        encoding_list.append(trait_encoding)
    return encoding_list

def get_active_traits(player, char_trait, trait_tier):
    active_traits = {}
    last_rounds = get_last_rounds(player)
    for rounds in last_rounds:
        stage = rounds.split('-')[0]
        board = player['round_outcomes']['by_round'][rounds]['board']
        char_seen = set()
        trait_count = {}
        for char_dict in board:
            char = char_dict['character_id']
            if char not in char_seen:
                char_seen.add(char_dict['character_id'])
                trait_list = char_trait.loc[char_trait['name'] == char]['trait'].values[0].replace('\'', '').replace(' ', '').strip('][ ').split(',')
                for trait in trait_list:
                    if trait not in trait_count:
                        trait_count[trait] = 1
                    else:
                        trait_count[trait] += 1      
        active_traits[stage] = trait_count
        
    return active_traits
    
def get_last_rounds(player):
    round_list = list(player['round_outcomes']['by_round'].keys())
    last_round_stage = []
    last_r = 0
    last_round = ''
    for rounds in round_list:
        r = int(rounds.split('-')[1])
        if last_r > r:
            last_round_stage.append(last_round)
        last_r = r
        last_round = rounds
    last_round_stage.append(round_list[-1])
    return last_round_stage

In [15]:
with open('full_mapping.json') as json_data:
    fullmap = json.load(json_data)
    
char_trait = pd.DataFrame(fullmap['character_trait_json'].items()).astype(str)
char_trait.columns = ['name', 'trait']
#print(char_trait)

trait_tier = pd.DataFrame(fullmap['trait_tier_mapping_json'].items())
#print(trait_tier)

In [16]:
get_active_traits(df.player.iloc[0], char_trait, trait_tier)

{'1': {'Blademaster': 1, 'Cybernetic': 1, 'Protector': 1, 'DarkStar': 1},
 '2': {'Blademaster': 1,
  'Celestial': 2,
  'Mystic': 1,
  'DarkStar': 3,
  'Protector': 2,
  'Vanguard': 1},
 '3': {'Blademaster': 1,
  'Celestial': 2,
  'Sniper': 1,
  'Chrono': 2,
  'Vanguard': 2,
  'Protector': 2,
  'DarkStar': 2},
 '4': {'Sniper': 1,
  'Celestial': 4,
  'Blademaster': 2,
  'ManaReaver': 1,
  'Cybernetic': 1,
  'Protector': 4,
  'DarkStar': 1,
  'StarGuardian': 1},
 '5': {'Sniper': 1,
  'Celestial': 4,
  'Blademaster': 3,
  'ManaReaver': 1,
  'Cybernetic': 1,
  'Protector': 4,
  'Battlecast': 1,
  'DarkStar': 1,
  'Chrono': 1},
 '6': {'Sniper': 1,
  'Celestial': 4,
  'Blademaster': 3,
  'ManaReaver': 1,
  'Cybernetic': 1,
  'Protector': 4,
  'Battlecast': 1,
  'DarkStar': 1,
  'Chrono': 1}}

In [17]:
def active_trait_func(player):
    return get_active_traits(player, char_trait, trait_tier)

In [18]:
active_trait_dict = [active_trait_func(df.player.iloc[i]) for i in range(df.shape[0])]

In [19]:
active_trait_df = pd.DataFrame(active_trait_dict).fillna(method='ffill',axis=1)
active_trait_df.head()

Unnamed: 0,1,2,3,4,5,6,7
0,"{'Blademaster': 1, 'Cybernetic': 1, 'Protector...","{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...","{'Blademaster': 1, 'Celestial': 2, 'Sniper': 1...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 2...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3..."
1,"{'Vanguard': 2, 'Cybernetic': 1, 'StarGuardian...","{'Blaster': 2, 'Chrono': 1, 'Mystic': 1, 'Batt...","{'Blaster': 2, 'Battlecast': 4, 'Infiltrator':...","{'Sorcerer': 1, 'Battlecast': 4, 'Blaster': 2,...","{'Infiltrator': 1, 'Battlecast': 5, 'Sorcerer'...","{'Infiltrator': 1, 'Battlecast': 6, 'Mystic': ...","{'Infiltrator': 1, 'Battlecast': 6, 'Mystic': ..."
2,"{'Sniper': 1, 'Chrono': 1, 'Protector': 1, 'Da...","{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'..."
3,"{'Mystic': 1, 'Battlecast': 2, 'Infiltrator': ...","{'Infiltrator': 1, 'DarkStar': 1, 'Mystic': 1,...","{'Mystic': 2, 'Astro': 1, 'Battlecast': 4, 'So...","{'Sniper': 4, 'Celestial': 1, 'Chrono': 1, 'As...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 2, 'Myst...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 1, 'Star...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 1, 'Star..."
4,"{'Vanguard': 2, 'StarGuardian': 1, 'Cybernetic...","{'Sniper': 1, 'Celestial': 1, 'Vanguard': 2, '...","{'Sniper': 2, 'Chrono': 1, 'Infiltrator': 1, '...","{'Sniper': 2, 'Chrono': 2, 'Infiltrator': 1, '...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf..."


# Characters and Items

In [20]:
def character_items(player):
    tp_dict={}
    
    round_outcome_dict = player['round_outcomes']
    by_round_dict = round_outcome_dict['by_round']
    
    current_stage = 0
    prev_stage = 1
    
    current_round = 'a'
    prev_round = 'a'
    
    for key in by_round_dict.keys():
        round_split = key.split("-")
        current_stage = int(round_split[0])

        current_round = key

        if(prev_round == 'a'):
            prev_round = key
            prev_stage = current_stage

        if (current_stage == prev_stage):
            prev_round = key
        #went from stage x to stage y
        elif (current_stage != prev_stage):
            #a dictionary containg board
            #get the board characters and items
            board_dict = by_round_dict[prev_round]['board']
            #each index has one character and item combo in a dictionary with character_id and items
            #the temp dictionary will be 1: [character_id, items]

            temp_list = []
            
            for char in board_dict:
                character_id_value = char['character_id']
                items_value = char['items']

                temp_dict = {}
                temp_dict['character_id'] = character_id_value
                temp_dict['items'] = items_value

                temp_list.append(temp_dict)

            tp_dict[prev_stage] = temp_list

            prev_round = key
            prev_stage = current_stage
            
    #last one
    board_dict = by_round_dict[prev_round]['board']
    
    temp_list = []

    for char in board_dict:
        character_id_value = char['character_id']
        items_value = char['items']

        temp_dict = {}
        temp_dict['character_id'] = character_id_value
        temp_dict['items'] = items_value

        temp_list.append(temp_dict)

    tp_dict[prev_stage] = temp_list
    
    
    return tp_dict


In [21]:
char_item_dict = [character_items(df.player.iloc[i]) for i in range(df.shape[0])]

In [22]:
char_item_dict[2]

{1: [{'character_id': 'TFT3_Caitlyn', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': []}],
 2: [{'character_id': 'TFT3_Xayah', 'items': [6, 19]},
  {'character_id': 'TFT3_Karma', 'items': []},
  {'character_id': 'TFT3_XinZhao', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': [5]}],
 3: [{'character_id': 'TFT3_Rakan', 'items': [37]},
  {'character_id': 'TFT3_Xayah', 'items': [6, 19]},
  {'character_id': 'TFT3_Riven', 'items': []},
  {'character_id': 'TFT3_Shen', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': [5]},
  {'character_id': 'TFT3_Fiora', 'items': []}],
 4: [{'character_id': 'TFT3_Rakan', 'items': [37]},
  {'character_id': 'TFT3_Riven', 'items': []},
  {'character_id': 'TFT3_Xayah', 'items': [12, 19, 69]},
  {'character_id': 'TFT3_Shen', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': [55]},
  {'character_id': 'TFT3_MasterYi', 'items': []}],
 5: [{'character_id': 'TFT3_Rakan', 'items': [37]},
  {'character_id': 'TFT3_MasterYi', 'item

## Create a dataframe with character_id, items, stage info and rank

In [23]:
i = 0
res = []
for i in range(len(char_item_dict)):
    for stage,comp in char_item_dict[i].items():
        for char in comp:
            res.append({"stage" : stage, "character_id" : char['character_id'], "items" : char["items"], \
                "gold_spent" : gold_df.iloc[i][stage], "level" : level_df.iloc[i][stage], \
                "active_trait" : active_trait_df.iloc[i][str(stage)], "rank" : df.player.iloc[i]["final"]["placement"]["rank"]})
#             print(f"i : {i}")
#             print(f"stage : {stage}")
#             print(f"character_id : {char['character_id']}")
#             print(char["items"])
#             print(gold_df.iloc[i][stage])      
#             print(level_df.iloc[i][stage])
#             print(active_trait_df.iloc[i][str(stage)])
    



In [24]:
combined_df = pd.DataFrame(res)
combined_df.tail(20)

Unnamed: 0,stage,character_id,items,gold_spent,level,active_trait,rank
67778,6,TFT3_Jayce,"[15, 36, 66]",22.0,7.0,"{'Mystic': 4, 'StarGuardian': 1, 'Astro': 2, '...",2
67779,6,TFT3_WuKong,"[45, 69]",22.0,7.0,"{'Mystic': 4, 'StarGuardian': 1, 'Astro': 2, '...",2
67780,1,TFT3_Zoe,[],2.0,1.666667,"{'Sorcerer': 1, 'StarGuardian': 1, 'Brawler': ...",8
67781,1,TFT3_Malphite,[],2.0,1.666667,"{'Sorcerer': 1, 'StarGuardian': 1, 'Brawler': ...",8
67782,2,TFT3_Caitlyn,[],20.0,3.666667,"{'Sniper': 1, 'Chrono': 2, 'Blademaster': 2, '...",8
67783,2,TFT3_MasterYi,[],20.0,3.666667,"{'Sniper': 1, 'Chrono': 2, 'Blademaster': 2, '...",8
67784,2,TFT3_Shen,[],20.0,3.666667,"{'Sniper': 1, 'Chrono': 2, 'Blademaster': 2, '...",8
67785,2,TFT3_Malphite,[7],20.0,3.666667,"{'Sniper': 1, 'Chrono': 2, 'Blademaster': 2, '...",8
67786,3,TFT3_TwistedFate,[14],54.0,5.5,"{'Sorcerer': 1, 'Chrono': 3, 'Blademaster': 3,...",8
67787,3,TFT3_Shen,[],54.0,5.5,"{'Sorcerer': 1, 'Chrono': 3, 'Blademaster': 3,...",8


In [25]:
# Pad item array with zeroes
combined_df["items"] = combined_df["items"].apply(lambda x: np.pad(i := np.asarray(x), (0,3-len(i))))

## Vectorise Items in combined_df

In [26]:
# List of all items
items_list = list(range(1,10)) + [10 * i + j for i in range(1,10) for j in range(i,10)]

In [27]:
def vectorise_items(items):
    item_index = {x:y for x,y in zip(range(1,10),range(0,18,2))}
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([18])
        for i in item_str:
            first_index = item_index[int(i)]
            if(not vect[first_index]):
                vect[first_index] = 1
            else:
                vect[first_index+1] = 1
        res[item] = vect
    res[0] = np.zeros([18])
    return res

In [28]:
item_vector_dict = vectorise_items(items_list)

In [29]:
# Convert a list of items to vectors
def item_vector_lookup(item_list, d=item_vector_dict):
    res = []
    for items in item_list:
        temp_res = []
        for item in items:
            temp_res = temp_res + d[item].tolist()
        res.append(temp_res)
    return res


In [30]:
one_hot_items = pd.DataFrame(item_vector_lookup(combined_df["items"]) , columns = [f"item_index{x}" for x in range(1,55)])

In [31]:
# add item vectors to the existing data frame
combined_df = combined_df.join(one_hot_items)                

## One-hot encode character_id

In [32]:
combined_df = combined_df.join(pd.get_dummies(combined_df["character_id"]))

In [33]:
combined_df[pd.isna(combined_df["active_trait"])]

Unnamed: 0,stage,character_id,items,gold_spent,level,active_trait,rank,item_index1,item_index2,item_index3,...,TFT3_Vi,TFT3_Viktor,TFT3_WuKong,TFT3_Xayah,TFT3_Xerath,TFT3_XinZhao,TFT3_Yasuo,TFT3_Zed,TFT3_Ziggs,TFT3_Zoe
7831,1,TFT3_Fiora,"[5, 0, 0]",3.0,1.666667,,1,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
10222,1,TFT3_Caitlyn,"[5, 0, 0]",2.0,1.666667,,3,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
39390,1,TFT3_Poppy,"[7, 0, 0]",5.0,2.0,,3,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
50564,1,TFT3_Ziggs,"[3, 0, 0]",4.0,2.0,,5,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0


### Fill missing active_trait values with an empty dict

In [34]:
combined_df["active_trait"] = combined_df["active_trait"].where(combined_df["active_trait"].notna(), lambda x: [{}])

## Vectorise active_trait column in combined_df

In [35]:
with open('full_mapping.json') as json_data:
    fullmap = json.load(json_data)

In [36]:
traits_list = pd.DataFrame(fullmap['trait_active_mapping_json'].items())[0].to_numpy()

In [37]:
active_traits = combined_df["active_trait"][0]
active_traits

{'Blademaster': 1, 'Cybernetic': 1, 'Protector': 1, 'DarkStar': 1}

In [38]:
temp_list = combined_df["active_trait"].to_list()
v = DictVectorizer(sparse=False)
x = v.fit_transform( combined_df["active_trait"])
combined_df = combined_df.join(pd.DataFrame(x, columns=v.get_feature_names()))

In [39]:
combined_df.head()

Unnamed: 0,stage,character_id,items,gold_spent,level,active_trait,rank,item_index1,item_index2,item_index3,...,Mystic,Paragon,Protector,Rebel,Sniper,Sorcerer,SpacePirate,StarGuardian,Starship,Vanguard
0,1,TFT3_Fiora,"[0.0, 0.0, 0.0]",4.0,2.0,"{'Blademaster': 1, 'Cybernetic': 1, 'Protector...",4,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,TFT3_JarvanIV,"[0.0, 0.0, 0.0]",4.0,2.0,"{'Blademaster': 1, 'Cybernetic': 1, 'Protector...",4,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,TFT3_Xayah,"[19, 0, 0]",20.0,4.166667,"{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...",4,1.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2,TFT3_Karma,"[0.0, 0.0, 0.0]",20.0,4.166667,"{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...",4,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2,TFT3_XinZhao,"[0.0, 0.0, 0.0]",20.0,4.166667,"{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...",4,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [40]:
combined_df = combined_df.drop(columns=["character_id","items","active_trait"])

In [41]:
combined_df.head()

Unnamed: 0,stage,gold_spent,level,rank,item_index1,item_index2,item_index3,item_index4,item_index5,item_index6,...,Mystic,Paragon,Protector,Rebel,Sniper,Sorcerer,SpacePirate,StarGuardian,Starship,Vanguard
0,1,4.0,2.0,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,4.0,2.0,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,20.0,4.166667,4,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### Create training and test sets

In [42]:
index_X = list(chain(range(3),range(4,combined_df.shape[1])))
X = combined_df.iloc[:,index_X]
Y = combined_df.iloc[:,3]

In [43]:
X.head()

Unnamed: 0,stage,gold_spent,level,item_index1,item_index2,item_index3,item_index4,item_index5,item_index6,item_index7,...,Mystic,Paragon,Protector,Rebel,Sniper,Sorcerer,SpacePirate,StarGuardian,Starship,Vanguard
0,1,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,20.0,4.166667,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2,20.0,4.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2,20.0,4.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [44]:
Y.head()

0    4
1    4
2    4
3    4
4    4
Name: rank, dtype: object

In [45]:
train_x, test_x, train_y, test_y = train_test_split(X,Y,test_size=0.2)

## Create and run Random Forest Regressor to predict rank

In [46]:
reg = RandomForestRegressor(n_jobs=-1)

In [47]:
reg.fit(train_x,train_y)

RandomForestRegressor(n_jobs=-1)

In [48]:
pred_y = reg.predict(test_x)

In [49]:
mean_squared_error(test_y,pred_y)

0.41363726062056777

In [50]:
mean_absolute_error(test_y,np.round(pred_y))

0.19690265486725664

In [51]:
def rank_accuracy(true,pred):
    return np.sum([x == y for x,y in zip(true,pred)])/true.shape[0]

### Accuracy of Random Forest regressor

In [52]:
rank_accuracy(test_y.astype(int), np.round(pred_y).astype(int))

0.8729351032448378

In [53]:
importances = reg.feature_importances_

In [54]:
indices = np.argsort(importances)[::-1]

In [78]:
for f in range(30):
    print("%d. feature %d (%f)" % (f+1 , indices[f], importances[indices[f]]))

1. feature 1 (0.210929)
2. feature 2 (0.108631)
3. feature 0 (0.097594)
4. feature 120 (0.039434)
5. feature 122 (0.039145)
6. feature 137 (0.035773)
7. feature 119 (0.032319)
8. feature 130 (0.031046)
9. feature 121 (0.028988)
10. feature 116 (0.028415)
11. feature 131 (0.026694)
12. feature 135 (0.026520)
13. feature 115 (0.025339)
14. feature 118 (0.024473)
15. feature 132 (0.024362)
16. feature 117 (0.022752)
17. feature 133 (0.022530)
18. feature 134 (0.022094)
19. feature 128 (0.022055)
20. feature 114 (0.021745)
21. feature 124 (0.020059)
22. feature 125 (0.016176)
23. feature 126 (0.014687)
24. feature 123 (0.013208)
25. feature 136 (0.005606)
26. feature 129 (0.005473)
27. feature 127 (0.002455)
28. feature 9 (0.001188)
29. feature 3 (0.001168)
30. feature 15 (0.001098)


In [56]:
train_x

Unnamed: 0,stage,gold_spent,level,item_index1,item_index2,item_index3,item_index4,item_index5,item_index6,item_index7,...,Mystic,Paragon,Protector,Rebel,Sniper,Sorcerer,SpacePirate,StarGuardian,Starship,Vanguard
41014,5,99.0,7.833333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,1.0,2.0,0.0,0.0,4.0,0.0,6.0,0.0,0.0
21416,4,63.0,6.833333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0
45869,5,96.0,7.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,2.0,0.0,0.0,3.0,0.0,6.0,0.0,1.0
31127,6,54.0,7.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
26002,6,31.0,8.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,6.0,0.0,5.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58564,5,90.0,7.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
50922,1,0.0,2.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
23708,3,22.0,5.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0
65267,2,28.0,4.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,2.0,0.0,3.0,0.0,2.0


## Create and run Random Forest Classifier to predict top 4

In [57]:
test_y_top_4 = np.where(test_y.astype('int') <= 4, 1, 0)

In [58]:
train_y_top_4 = np.where(train_y.astype('int') <= 4, 1, 0)

In [59]:
clf_top_4 = RandomForestClassifier(n_jobs=-1)

In [60]:
clf_top_4.fit(train_x,train_y_top_4)

RandomForestClassifier(n_jobs=-1)

In [61]:
pred_y_top_4 = clf_top_4.predict(test_x)

In [62]:
accuracy_score(test_y_top_4, pred_y_top_4)

0.9477138643067846

In [63]:
importances_clf = reg.feature_importances_

In [64]:
indices_clf = np.argsort(importances_clf)[::-1]

In [65]:
for f in range(10):
    print("%d. feature %d (%f)" % (f+1 , indices_clf[f], importances_clf[indices[f]]))

1. feature 1 (0.210929)
2. feature 2 (0.108631)
3. feature 0 (0.097594)
4. feature 120 (0.039434)
5. feature 122 (0.039145)
6. feature 137 (0.035773)
7. feature 119 (0.032319)
8. feature 130 (0.031046)
9. feature 121 (0.028988)
10. feature 116 (0.028415)


## Final Prediction 

In [66]:
# Generate all possible item combinations
from itertools import combinations_with_replacement 
def generate_item_combinations(items_list):
    combinations = combinations_with_replacement(items_list,3)
    return [combination for combination in combinations if combination[0] <= combination[1] and combination[1] <= combination[2] and combination[0] <= combination[2]]



In [67]:
item_combinations_list = generate_item_combinations(items_list)
random.sample(item_combinations_list, 10)

[(16, 34, 39),
 (19, 24, 67),
 (9, 11, 44),
 (25, 69, 77),
 (4, 27, 48),
 (13, 38, 59),
 (16, 46, 59),
 (36, 45, 49),
 (8, 11, 44),
 (15, 45, 59)]

In [68]:
len(item_combinations_list)

27720

In [69]:
# Vector embeddings for all item combinations
item_combinations_vector = item_vector_lookup(item_combinations_list)

In [70]:
item_combinations_vector = np.asarray(item_combinations_vector)

In [71]:
item_combinations_vector.shape

(27720, 54)

In [72]:
# Get n item recommendations and their predicted rank by running random forest model on all possible item combinations
def get_n_item_recommendations(model, item_combinations_list, item_combinations_vector, input_vector, n=5):
    # Duplicate input vector N times, N = #total number of item combinations
    input_vector_stretched = np.tile(input_vector,(item_combinations_vector.shape[0],1))
    # Add every item combination vector to the stretched input vector
    input_vector_with_item_combo = np.insert(input_vector_stretched,3,item_combinations_vector.transpose(),axis=1)
    # Predict rank
    results = reg.predict(input_vector_with_item_combo)
    # sort indices by predicted rank (descending)
    sorted_items_index = np.argsort(-results)
    # Return n item combinations with the highest predicted ranks
    return np.asarray([(item_combinations_list[x], results[x]) for x in sorted_items_index[:n]])
    
    

In [73]:
item_mapping = fullmap['item_mapping_json']

In [74]:
# Get item names for a tuple of item numbers
def get_item_names(item_combo, item_mapping = item_mapping):
    return tuple(item_mapping[str(item)] for item in item_combo)

In [75]:
# Get a random row of combined_df and remove all item indices
not_item_index = list(chain(range(3),range(58,combined_df.shape[1])))
#x_single = combined_df.iloc[663,not_item_index]
x_single = combined_df.iloc[random.randint(0,combined_df.shape[0]),~combined_df.columns.str.match(r'^(item|rank)')]
print(x_single)
input_vector = x_single.to_numpy()

stage            4
gold_spent      50
level            6
TFT3_Ahri        0
TFT3_Annie       0
                ..
Sorcerer         3
SpacePirate      0
StarGuardian     4
Starship         0
Vanguard         0
Name: 51321, Length: 84, dtype: object


In [76]:
recommended_items = get_n_item_recommendations(reg, item_combinations_list, item_combinations_vector, input_vector,n=10)
recommended_items

array([[(5, 16, 77), 7.02],
       [(55, 56, 77), 7.02],
       [(5, 18, 77), 7.02],
       [(55, 68, 77), 7.02],
       [(22, 67, 77), 7.02],
       [(22, 57, 77), 7.02],
       [(55, 67, 77), 7.02],
       [(22, 68, 77), 7.02],
       [(22, 22, 77), 7.02],
       [(22, 25, 77), 7.02]], dtype=object)

In [77]:
[get_item_names(x) for x in recommended_items[:,0]]

[('Chain Vest', 'Bloodthirster', 'WarmogsArmor'),
 ('BrambleVest', 'SwordBreaker', 'WarmogsArmor'),
 ('Chain Vest', 'BladeoftheRuinedKing', 'WarmogsArmor'),
 ('BrambleVest', 'CelestialOrb', 'WarmogsArmor'),
 ('RapidFirecannon', 'Zephyr', 'WarmogsArmor'),
 ('RapidFirecannon', 'RedBuff', 'WarmogsArmor'),
 ('BrambleVest', 'Zephyr', 'WarmogsArmor'),
 ('RapidFirecannon', 'CelestialOrb', 'WarmogsArmor'),
 ('RapidFirecannon', 'RapidFirecannon', 'WarmogsArmor'),
 ('RapidFirecannon', 'TitansResolve', 'WarmogsArmor')]