In [1]:
import numpy as np
import pandas as pd
import json
from typing import Dict
from itertools import groupby, chain
from functools import reduce
from collections import Counter, defaultdict
import re
from sklearn.preprocessing import OneHotEncoder 
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier 
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
import pickle

In [2]:
df = pd.read_json("export/export_0.json")
for i in range(1,11):
    print(i)
    dframe = pd.read_json("export/export_{}0.json".format(str(i)))
    df = df.append(dframe)
    
df.to_pickle('export_data_frame')

1
2
3
4
5
6
7
8
9
10


# Gold Per Stage

In [3]:
def gold_spent(player):
    gold_spent_per_round = {}
    for currRound, group in groupby(player["gold"]["by_round"], lambda x: x["current_round"]):
        l = list(group)
        goldSpent = max(0,int(l[0]["gold"]) - int(l[-1]["gold"]))
        gold_spent_per_round[currRound] = goldSpent
    gold_spent_by_stage = defaultdict(int)
    for x in list(gold_spent_per_round.keys()):
        m = re.match(r'(\d).+', x)
        gold_spent_by_stage[int(m.group(1))] += gold_spent_per_round[m.group(0)]

    return gold_spent_by_stage

In [4]:
gold_spent(df.player.iloc[0])

defaultdict(int, {1: 4, 2: 20, 3: 13, 4: 97, 5: 40, 6: 30})

In [5]:
gold_dict = [gold_spent(df.player.iloc[i]) for i in range(df.shape[0])]

In [6]:
gold_dict[:10]

[defaultdict(int, {1: 4, 2: 20, 3: 13, 4: 97, 5: 40, 6: 30}),
 defaultdict(int, {1: 4, 2: 5, 3: 27, 4: 67, 5: 68, 6: 87}),
 defaultdict(int, {1: 2, 2: 5, 3: 86, 4: 24, 5: 70, 6: 12}),
 defaultdict(int, {1: 4, 2: 7, 3: 52, 4: 68, 5: 109, 6: 31}),
 defaultdict(int, {1: 2, 2: 11, 3: 36, 4: 93, 5: 66}),
 defaultdict(int, {1: 2, 2: 22, 3: 15, 4: 62, 5: 80}),
 defaultdict(int, {1: 4, 2: 4, 3: 53, 4: 71, 5: 76}),
 defaultdict(int, {1: 4, 2: 13, 3: 24, 4: 69, 5: 87}),
 defaultdict(int, {1: 2, 2: 9, 3: 52, 4: 79, 5: 94, 6: 18}),
 defaultdict(int, {1: 1, 2: 25, 3: 34, 4: 47, 5: 43, 6: 0})]

In [7]:
gold_df = pd.DataFrame(gold_dict).fillna(0)
gold_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8
0,4.0,20.0,13.0,97.0,40.0,30.0,0.0,0.0
1,4.0,5.0,27.0,67.0,68.0,87.0,0.0,0.0
2,2.0,5.0,86.0,24.0,70.0,12.0,0.0,0.0
3,4.0,7.0,52.0,68.0,109.0,31.0,0.0,0.0
4,2.0,11.0,36.0,93.0,66.0,0.0,0.0,0.0


# Level Per Stage

In [8]:
def level_stage(player):
    tp_dict = {}
    
    level_agg = 0
    current_stage = 0
    count_stage = 0
    
    for key, group in groupby(player["xp"]["by_round"], lambda x: x["current_round"]): 
        round_split = key.split("-")
        stage = int(round_split[0])

        #start of loop
        if current_stage == 0:
            current_stage = stage
        #going from stage x to stage y
        elif current_stage != stage:
            level_avg = level_agg/count_stage
#             tp_dict[f"level_at_stage_{current_stage}"] = level_avg 
            tp_dict[current_stage] = level_avg 
            current_stage = stage
            level_agg = 0
            count_stage = 0

        #continue
        list_r = list(group)
        level_agg += list_r[0]['level']
        count_stage += 1

    #getting the last stage
    level_avg = level_agg/count_stage
#     tp_dict[f"level_at_stage_{current_stage}"] = level_avg
    tp_dict[current_stage] = level_avg
    
    return tp_dict

In [9]:
level_stage(df.player.iloc[0])

{1: 2.0,
 2: 4.166666666666667,
 3: 5.333333333333333,
 4: 6.5,
 5: 7.333333333333333,
 6: 8.0}

In [10]:
level_dict = [level_stage(df.player.iloc[i]) for i in range(df.shape[0])]

In [11]:
level_df = pd.DataFrame(level_dict).fillna(method='ffill',axis=1)
level_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8
0,2.0,4.166667,5.333333,6.5,7.333333,8.0,8.0,8.0
1,2.0,4.333333,5.0,5.833333,7.166667,8.0,8.0,8.0
2,1.666667,3.666667,5.166667,6.0,6.833333,7.0,7.0,7.0
3,1.666667,4.166667,5.833333,7.333333,8.0,8.0,8.0,8.0
4,1.666667,3.5,4.833333,6.0,7.75,7.75,7.75,7.75


# Active Traits

In [12]:
def encode_traits(trait_dict_column, active_trait_df, trait_index):
    encoding_list = []
    for trait_dict in trait_dict_column:
        trait_encoding = [ 0 for i in range(len(trait_index)) ]
        for key, value in trait_dict.items():
            if key[:5] == 'Set3_':
                trait = key[5:]
            else:
                trait = key
            min_active_dict = active_trait_df.loc[active_trait_df['trait'] == trait]['min_active'].values[0]
            has_trait_flag = 0
            for k, v in min_active_dict.items():
                if value >= v[0] and value < v[1]:
                    trait_tier = int(k)
                    has_trait_flag = 1
            if has_trait_flag == 0:
                trait_tier = 0
            trait_encoding[trait_index[trait]] = trait_tier
        encoding_list.append(trait_encoding)
    return encoding_list

def get_active_traits(player, char_trait, trait_tier):
    active_traits = {}
    last_rounds = get_last_rounds(player)
    for rounds in last_rounds:
        stage = rounds.split('-')[0]
        board = player['round_outcomes']['by_round'][rounds]['board']
        char_seen = set()
        trait_count = {}
        for char_dict in board:
            char = char_dict['character_id']
            if char not in char_seen:
                char_seen.add(char_dict['character_id'])
                trait_list = char_trait.loc[char_trait['name'] == char]['trait'].values[0].replace('\'', '').replace(' ', '').strip('][ ').split(',')
                for trait in trait_list:
                    if trait not in trait_count:
                        trait_count[trait] = 1
                    else:
                        trait_count[trait] += 1      
        active_traits[stage] = trait_count
        
    return active_traits
    
def get_last_rounds(player):
    round_list = list(player['round_outcomes']['by_round'].keys())
    last_round_stage = []
    last_r = 0
    last_round = ''
    for rounds in round_list:
        r = int(rounds.split('-')[1])
        if last_r > r:
            last_round_stage.append(last_round)
        last_r = r
        last_round = rounds
    last_round_stage.append(round_list[-1])
    return last_round_stage

In [13]:
with open('full_mapping.json') as json_data:
    fullmap = json.load(json_data)
    
char_trait = pd.DataFrame(fullmap['character_trait_json'].items()).astype(str)
char_trait.columns = ['name', 'trait']
#print(char_trait)

trait_tier = pd.DataFrame(fullmap['trait_tier_mapping_json'].items())
#print(trait_tier)

In [14]:
get_active_traits(df.player.iloc[0], char_trait, trait_tier)

{'1': {'Blademaster': 1, 'Cybernetic': 1, 'Protector': 1, 'DarkStar': 1},
 '2': {'Blademaster': 1,
  'Celestial': 2,
  'Mystic': 1,
  'DarkStar': 3,
  'Protector': 2,
  'Vanguard': 1},
 '3': {'Blademaster': 1,
  'Celestial': 2,
  'Sniper': 1,
  'Chrono': 2,
  'Vanguard': 2,
  'Protector': 2,
  'DarkStar': 2},
 '4': {'Sniper': 1,
  'Celestial': 4,
  'Blademaster': 2,
  'ManaReaver': 1,
  'Cybernetic': 1,
  'Protector': 4,
  'DarkStar': 1,
  'StarGuardian': 1},
 '5': {'Sniper': 1,
  'Celestial': 4,
  'Blademaster': 3,
  'ManaReaver': 1,
  'Cybernetic': 1,
  'Protector': 4,
  'Battlecast': 1,
  'DarkStar': 1,
  'Chrono': 1},
 '6': {'Sniper': 1,
  'Celestial': 4,
  'Blademaster': 3,
  'ManaReaver': 1,
  'Cybernetic': 1,
  'Protector': 4,
  'Battlecast': 1,
  'DarkStar': 1,
  'Chrono': 1}}

In [15]:
def active_trait_func(player):
    return get_active_traits(player, char_trait, trait_tier)

In [16]:
active_trait_dict = [active_trait_func(df.player.iloc[i]) for i in range(df.shape[0])]

In [17]:
active_trait_df = pd.DataFrame(active_trait_dict).fillna(method='ffill',axis=1)
active_trait_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8
0,"{'Blademaster': 1, 'Cybernetic': 1, 'Protector...","{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...","{'Blademaster': 1, 'Celestial': 2, 'Sniper': 1...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 2...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3...","{'Sniper': 1, 'Celestial': 4, 'Blademaster': 3..."
1,"{'Vanguard': 2, 'Cybernetic': 1, 'StarGuardian...","{'Blaster': 2, 'Chrono': 1, 'Mystic': 1, 'Batt...","{'Blaster': 2, 'Battlecast': 4, 'Infiltrator':...","{'Sorcerer': 1, 'Battlecast': 4, 'Blaster': 2,...","{'Infiltrator': 1, 'Battlecast': 5, 'Sorcerer'...","{'Infiltrator': 1, 'Battlecast': 6, 'Mystic': ...","{'Infiltrator': 1, 'Battlecast': 6, 'Mystic': ...","{'Infiltrator': 1, 'Battlecast': 6, 'Mystic': ..."
2,"{'Sniper': 1, 'Chrono': 1, 'Protector': 1, 'Da...","{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'...","{'Protector': 2, 'Celestial': 2, 'Blademaster'..."
3,"{'Mystic': 1, 'Battlecast': 2, 'Infiltrator': ...","{'Infiltrator': 1, 'DarkStar': 1, 'Mystic': 1,...","{'Mystic': 2, 'Astro': 1, 'Battlecast': 4, 'So...","{'Sniper': 4, 'Celestial': 1, 'Chrono': 1, 'As...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 2, 'Myst...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 1, 'Star...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 1, 'Star...","{'Sniper': 4, 'Astro': 3, 'DarkStar': 1, 'Star..."
4,"{'Vanguard': 2, 'StarGuardian': 1, 'Cybernetic...","{'Sniper': 1, 'Celestial': 1, 'Vanguard': 2, '...","{'Sniper': 2, 'Chrono': 1, 'Infiltrator': 1, '...","{'Sniper': 2, 'Chrono': 2, 'Infiltrator': 1, '...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf...","{'Mystic': 1, 'DarkStar': 6, 'Sniper': 3, 'Inf..."


# Characters and Items

In [18]:
def character_items(player):
    tp_dict={}
    
    round_outcome_dict = player['round_outcomes']
    by_round_dict = round_outcome_dict['by_round']
    
    current_stage = 0
    prev_stage = 1
    
    current_round = 'a'
    prev_round = 'a'
    
    for key in by_round_dict.keys():
        round_split = key.split("-")
        current_stage = int(round_split[0])

        current_round = key

        if(prev_round == 'a'):
            prev_round = key
            prev_stage = current_stage

        if (current_stage == prev_stage):
            prev_round = key
        #went from stage x to stage y
        elif (current_stage != prev_stage):
            #a dictionary containg board
            #get the board characters and items
            board_dict = by_round_dict[prev_round]['board']
            #each index has one character and item combo in a dictionary with character_id and items
            #the temp dictionary will be 1: [character_id, items]

            temp_list = []
            
            for char in board_dict:
                character_id_value = char['character_id']
                items_value = char['items']

                temp_dict = {}
                temp_dict['character_id'] = character_id_value
                temp_dict['items'] = items_value

                temp_list.append(temp_dict)

            tp_dict[prev_stage] = temp_list

            prev_round = key
            prev_stage = current_stage
            
    #last one
    board_dict = by_round_dict[prev_round]['board']
    
    temp_list = []

    for char in board_dict:
        character_id_value = char['character_id']
        items_value = char['items']

        temp_dict = {}
        temp_dict['character_id'] = character_id_value
        temp_dict['items'] = items_value

        temp_list.append(temp_dict)

    tp_dict[prev_stage] = temp_list
    
    
    return tp_dict


In [19]:
char_item_dict = [character_items(df.player.iloc[i]) for i in range(df.shape[0])]

In [20]:
char_item_dict[2]

{1: [{'character_id': 'TFT3_Caitlyn', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': []}],
 2: [{'character_id': 'TFT3_Xayah', 'items': [6, 19]},
  {'character_id': 'TFT3_Karma', 'items': []},
  {'character_id': 'TFT3_XinZhao', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': [5]}],
 3: [{'character_id': 'TFT3_Rakan', 'items': [37]},
  {'character_id': 'TFT3_Xayah', 'items': [6, 19]},
  {'character_id': 'TFT3_Riven', 'items': []},
  {'character_id': 'TFT3_Shen', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': [5]},
  {'character_id': 'TFT3_Fiora', 'items': []}],
 4: [{'character_id': 'TFT3_Rakan', 'items': [37]},
  {'character_id': 'TFT3_Riven', 'items': []},
  {'character_id': 'TFT3_Xayah', 'items': [12, 19, 69]},
  {'character_id': 'TFT3_Shen', 'items': []},
  {'character_id': 'TFT3_JarvanIV', 'items': [55]},
  {'character_id': 'TFT3_MasterYi', 'items': []}],
 5: [{'character_id': 'TFT3_Rakan', 'items': [37]},
  {'character_id': 'TFT3_MasterYi', 'item

## Create a dataframe with character_id, items, stage info and rank

In [21]:
i = 0
res = []
for i in range(len(char_item_dict)):
    for stage,comp in char_item_dict[i].items():
        for char in comp:
            res.append({"stage" : stage, "character_id" : char['character_id'], "items" : char["items"], \
                "gold_spent" : gold_df.iloc[i][stage], "level" : level_df.iloc[i][stage], \
                "active_trait" : active_trait_df.iloc[i][str(stage)], "rank" : df.player.iloc[i]["final"]["placement"]["rank"]})
#             print(f"i : {i}")
#             print(f"stage : {stage}")
#             print(f"character_id : {char['character_id']}")
#             print(char["items"])
#             print(gold_df.iloc[i][stage])      
#             print(level_df.iloc[i][stage])
#             print(active_trait_df.iloc[i][str(stage)])
    



In [22]:
combined_df = pd.DataFrame(res)
combined_df.tail(20)

Unnamed: 0,stage,character_id,items,gold_spent,level,active_trait,rank
3425293,3,TFT3_Darius,[],71.0,5.5,"{'Mystic': 3, 'Astro': 1, 'DarkStar': 1, 'Batt...",6
3425294,3,TFT3_Jayce,[],71.0,5.5,"{'Mystic': 3, 'Astro': 1, 'DarkStar': 1, 'Batt...",6
3425295,3,TFT3_Annie,[],71.0,5.5,"{'Mystic': 3, 'Astro': 1, 'DarkStar': 1, 'Batt...",6
3425296,3,TFT3_Leona,[],71.0,5.5,"{'Mystic': 3, 'Astro': 1, 'DarkStar': 1, 'Batt...",6
3425297,4,TFT3_Jhin,"[15, 29]",68.0,7.5,"{'Sniper': 2, 'DarkStar': 2, 'Mystic': 2, 'Ast...",6
3425298,4,TFT3_Karma,[5],68.0,7.5,"{'Sniper': 2, 'DarkStar': 2, 'Mystic': 2, 'Ast...",6
3425299,4,TFT3_Teemo,[44],68.0,7.5,"{'Sniper': 2, 'DarkStar': 2, 'Mystic': 2, 'Ast...",6
3425300,4,TFT3_Soraka,[],68.0,7.5,"{'Sniper': 2, 'DarkStar': 2, 'Mystic': 2, 'Ast...",6
3425301,4,TFT3_Nautilus,[],68.0,7.5,"{'Sniper': 2, 'DarkStar': 2, 'Mystic': 2, 'Ast...",6
3425302,4,TFT3_Jayce,[],68.0,7.5,"{'Sniper': 2, 'DarkStar': 2, 'Mystic': 2, 'Ast...",6


In [23]:
# Pad item array with zeroes
combined_df["items"] = combined_df["items"].apply(lambda x: np.pad(i := np.asarray(x), (3-len(i),0)))

## Vectorise Items in combined_df

In [24]:
# List of all items
items_list = list(range(1,10)) + [10 * i + j for i in range(1,10) for j in range(i,10)]

In [25]:
def vectorise_items(items):
    item_index = {x:y for x,y in zip(range(1,10),range(0,18,2))}
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([18])
        for i in item_str:
            first_index = item_index[int(i)]
            if(not vect[first_index]):
                vect[first_index] = 1
            else:
                vect[first_index+1] = 1
        res[item] = vect
    res[0] = np.zeros([18])
    return res

In [26]:
item_vector_dict = vectorise_items(items_list)

In [27]:
# Convert a list of items to vectors
def item_vector_lookup(item_list, d=item_vector_dict):
    res = []
    for items in item_list:
        temp_res = []
        for item in items:
            temp_res = temp_res + d[item].tolist()
        res.append(temp_res)
    return res


In [28]:
one_hot_items = pd.DataFrame(item_vector_lookup(combined_df["items"]) , columns = [f"item_index{x}" for x in range(1,55)])

In [29]:
# add item vectors to the existing data frame
combined_df = combined_df.join(one_hot_items)                

## One-hot encode character_id

In [30]:
combined_df = combined_df.join(pd.get_dummies(combined_df["character_id"]))

In [31]:
combined_df[pd.isna(combined_df["active_trait"])]

Unnamed: 0,stage,character_id,items,gold_spent,level,active_trait,rank,item_index1,item_index2,item_index3,...,TFT3_Vi,TFT3_Viktor,TFT3_WuKong,TFT3_Xayah,TFT3_Xerath,TFT3_XinZhao,TFT3_Yasuo,TFT3_Zed,TFT3_Ziggs,TFT3_Zoe
7831,1,TFT3_Fiora,"[0, 0, 5]",3.0,1.666667,,1,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
10222,1,TFT3_Caitlyn,"[0, 0, 5]",2.0,1.666667,,3,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
39390,1,TFT3_Poppy,"[0, 0, 7]",5.0,2.000000,,3,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
50564,1,TFT3_Ziggs,"[0, 0, 3]",4.0,2.000000,,5,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
68356,1,TFT3_Nocturne,"[0, 0, 9]",4.0,2.000000,,2,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3378788,1,TFT3_Poppy,"[0, 0, 4]",4.0,1.666667,,5,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3385161,1,TFT3_Ziggs,"[0, 0, 5]",0.0,1.000000,,2,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
3386821,1,TFT3_Xayah,"[0, 0, 2]",6.0,2.000000,,2,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0
3393822,1,TFT3_Graves,"[0.0, 0.0, 0.0]",0.0,1.500000,,6,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


### Fill missing active_trait values with an empty dict

In [32]:
combined_df["active_trait"] = combined_df["active_trait"].where(combined_df["active_trait"].notna(), lambda x: [{}])

## Vectorise active_trait column in combined_df

In [33]:
with open('full_mapping.json') as json_data:
    fullmap = json.load(json_data)

In [34]:
traits_list = pd.DataFrame(fullmap['trait_active_mapping_json'].items())[0].to_numpy()

In [35]:
active_traits = combined_df["active_trait"][0]
active_traits

{'Blademaster': 1, 'Cybernetic': 1, 'Protector': 1, 'DarkStar': 1}

In [36]:
temp_list = combined_df["active_trait"].to_list()
v = DictVectorizer(sparse=False)
x = v.fit_transform( combined_df["active_trait"])
combined_df = combined_df.join(pd.DataFrame(x, columns=v.get_feature_names()))

In [37]:
combined_df.head()

Unnamed: 0,stage,character_id,items,gold_spent,level,active_trait,rank,item_index1,item_index2,item_index3,...,Mystic,Paragon,Protector,Rebel,Sniper,Sorcerer,SpacePirate,StarGuardian,Starship,Vanguard
0,1,TFT3_Fiora,"[0.0, 0.0, 0.0]",4.0,2.0,"{'Blademaster': 1, 'Cybernetic': 1, 'Protector...",4,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,TFT3_JarvanIV,"[0.0, 0.0, 0.0]",4.0,2.0,"{'Blademaster': 1, 'Cybernetic': 1, 'Protector...",4,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,TFT3_Xayah,"[0, 0, 19]",20.0,4.166667,"{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...",4,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2,TFT3_Karma,"[0.0, 0.0, 0.0]",20.0,4.166667,"{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...",4,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2,TFT3_XinZhao,"[0.0, 0.0, 0.0]",20.0,4.166667,"{'Blademaster': 1, 'Celestial': 2, 'Mystic': 1...",4,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [38]:
combined_df = combined_df.drop(columns=["character_id","items","active_trait"])

In [39]:
combined_df.head()

Unnamed: 0,stage,gold_spent,level,rank,item_index1,item_index2,item_index3,item_index4,item_index5,item_index6,...,Mystic,Paragon,Protector,Rebel,Sniper,Sorcerer,SpacePirate,StarGuardian,Starship,Vanguard
0,1,4.0,2.0,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,4.0,2.0,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,2,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [40]:
combined_df = combined_df.join(pd.get_dummies(combined_df["stage"], prefix="stage"))
combined_df.to_pickle('final_data_frame_one_hot')

In [72]:
combined_df.head()

Unnamed: 0,gold_spent,level,rank,item_index1,item_index2,item_index3,item_index4,item_index5,item_index6,item_index7,...,Starship,Vanguard,stage_1,stage_2,stage_3,stage_4,stage_5,stage_6,stage_7,stage_8
0,4.0,2.0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1,4.0,2.0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
2,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0,1,0,0,0,0,0,0
3,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0,1,0,0,0,0,0,0
4,20.0,4.166667,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0,1,0,0,0,0,0,0


In [50]:
combined_df = combined_df.drop(columns = ["stage"])

### Create training and test sets

In [52]:
X = combined_df.drop(columns = ['rank'])
Y = combined_df['rank']

In [53]:
X.head()

Unnamed: 0,gold_spent,level,item_index1,item_index2,item_index3,item_index4,item_index5,item_index6,item_index7,item_index8,...,Starship,Vanguard,stage_1,stage_2,stage_3,stage_4,stage_5,stage_6,stage_7,stage_8
0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
1,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,0,0,0,0,0
2,20.0,4.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0,1,0,0,0,0,0,0
3,20.0,4.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0,1,0,0,0,0,0,0
4,20.0,4.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0,1,0,0,0,0,0,0


In [54]:
Y.head()

0    4
1    4
2    4
3    4
4    4
Name: rank, dtype: object

In [55]:
train_x, test_x, train_y, test_y = train_test_split(X,Y,test_size=0.2)

## Create and run Random Forest Regressor to predict rank

In [56]:
reg = RandomForestRegressor(n_jobs=-1)

In [57]:
reg.fit(train_x,train_y)

RandomForestRegressor(n_jobs=-1)

In [58]:
pred_y = reg.predict(test_x)

In [59]:
mean_squared_error(test_y,pred_y)

1.124109553811716

In [60]:
mean_absolute_error(test_y,np.round(pred_y))

0.4838854236763626

In [61]:
def rank_accuracy(true,pred):
    return np.sum([x == y for x,y in zip(true,pred)])/true.shape[0]

### Accuracy of Random Forest regressor

In [62]:
rank_accuracy(test_y.astype(int), np.round(pred_y).astype(int))

0.7191805717138424

In [63]:
reg.feature_importances_

array([2.41929347e-01, 1.43612345e-01, 7.83580597e-04, 1.01204220e-04,
       5.51466436e-04, 1.13638592e-04, 3.71696041e-04, 1.03751745e-04,
       3.71916972e-04, 6.77853381e-06, 6.03588345e-04, 7.50785656e-05,
       2.19364171e-04, 9.87194975e-06, 1.86194526e-04, 2.39813158e-07,
       2.12815938e-05, 1.41306741e-07, 2.55752461e-04, 0.00000000e+00,
       1.44637358e-03, 8.91452310e-05, 1.18383149e-03, 3.00839551e-04,
       1.24860120e-03, 4.77289095e-04, 1.04717745e-03, 1.09367934e-04,
       1.17926897e-03, 2.78946344e-04, 8.70615991e-04, 1.80979634e-04,
       7.37889822e-04, 2.31624847e-05, 8.28889389e-05, 8.55959545e-06,
       1.23882953e-03, 0.00000000e+00, 2.86183956e-03, 7.29824514e-05,
       2.61600130e-03, 5.46324888e-04, 3.10927582e-03, 5.93263334e-04,
       3.16268441e-03, 1.02182841e-03, 2.96723129e-03, 6.33471286e-04,
       2.99187256e-03, 7.01051380e-04, 3.38401341e-03, 8.47366506e-04,
       9.19210283e-04, 3.57706625e-04, 3.08419419e-03, 7.10812975e-04,
      

## Create and run Random Forest Classifier to predict top 4

In [64]:
test_y_top_4 = np.where(test_y.astype('int') <= 4, 1, 0)

In [65]:
train_y_top_4 = np.where(train_y.astype('int') <= 4, 1, 0)

In [66]:
clf_top_4 = RandomForestClassifier(n_jobs=-1)

In [67]:
clf_top_4.fit(train_x,train_y_top_4)

RandomForestClassifier(n_jobs=-1)

In [68]:
pred_y_top_4 = clf_top_4.predict(test_x)

In [69]:
accuracy_score(test_y_top_4, pred_y_top_4)

0.840531454771313

In [70]:
clf_top_4.feature_importances_

array([2.43396897e-01, 1.20566070e-01, 2.38741973e-03, 2.68120804e-04,
       1.61586959e-03, 2.86066578e-04, 1.13110854e-03, 2.92467776e-04,
       1.05343447e-03, 2.69355542e-05, 1.65529649e-03, 1.85258291e-04,
       6.73391963e-04, 3.22180744e-05, 5.48409213e-04, 1.84448401e-06,
       7.35159234e-05, 3.05151430e-07, 7.32411556e-04, 0.00000000e+00,
       4.02707786e-03, 2.56667497e-04, 3.25661833e-03, 7.10803604e-04,
       3.29133909e-03, 1.03594471e-03, 2.68240280e-03, 3.37952921e-04,
       3.19951525e-03, 7.31167525e-04, 2.30595573e-03, 4.50688605e-04,
       1.93858621e-03, 8.17030998e-05, 2.72826808e-04, 4.32991811e-05,
       2.89496968e-03, 0.00000000e+00, 5.86069898e-03, 2.10652219e-04,
       5.75725568e-03, 1.24866025e-03, 6.79462774e-03, 1.24230430e-03,
       6.55570654e-03, 2.32457060e-03, 6.38270914e-03, 1.44237944e-03,
       6.59931199e-03, 1.76340896e-03, 6.09736407e-03, 1.72066404e-03,
       2.30753661e-03, 1.35916709e-03, 6.84752392e-03, 1.69067429e-03,
      

In [71]:
filename = 'final_reg_model.sav'
pickle.dump(reg, open(filename, 'wb'))
filename2 = 'final_clf_model.sav'
pickle.dump(clf_top_4, open(filename2, 'wb'))