In [16]:
import numpy as np
import pandas as pd
import json
from typing import Dict
from itertools import groupby
from functools import reduce
from collections import Counter
import scipy.sparse as sps
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score
from sklearn.model_selection import GridSearchCV

In [17]:
df = pd.read_json("export/export_0.json")

In [18]:
# List of all items
items_list = list(range(1,10)) + [10 * i + j for i in range(1,10) for j in range(i,10)]

In [19]:
# Converting items to their corresponding vectors
# Similar to one hot encoding
# There are 9 basic items. Each basic item is alloted 2 bits. Therefore, size of vector = 9*2 = 18
# Basic item to index mapping is as follows:
# {1: (0, 1),
#  2: (2, 3),
#  3: (4, 5),
#  4: (6, 7),
#  5: (8, 9),
#  6: (10, 11),
#  7: (12, 13),
#  8: (14, 15),
#  9: (16, 17)}
# For basic items, the leftmost bit alloted to that item is set to 1. 
# eg: item_2 => [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# For combined items, the leftmost bits alloted to the 2 basic items
# used to create the combined item are set to 1.
# eg: item_23 => [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# for combined items created using 2 of the same item, both the bits alloted to the basic item are set to 1.
# eg: item_33 => [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def vectorise_items(items):
    item_index = {x:y for x,y in zip(range(1,10),range(0,18,2))}
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([18])
        for i in item_str:
            first_index = item_index[int(i)]
            if(not vect[first_index]):
                vect[first_index] = 1
            else:
                vect[first_index+1] = 1
        res[item] = vect
    res[0] = np.zeros([18])
    return res

In [20]:
def vectorise_items2(items):
    res = {}
    for i,item in enumerate(items):
        item_str = str(item)
        vect = np.zeros([9])
        for i in item_str:
            vect[int(i)-1] += 1
        res[item] = vect
    res[0] = np.zeros([9])
    return res

In [21]:
item_vector_dict = vectorise_items(items_list)

In [22]:
#Make a dataframe with character_id, level, and items
char_item_rank = [(char["character_id"], char["level"], \
                   np.pad(i := np.asarray(char["items"]), (0,3-len(i))), player["final"]["placement"]["rank"]) \
                      for player in df.player for char in player["final"]["comp"]]
     
char_item_rank_df = pd.DataFrame(char_item_rank, columns=["character_id","level","items","rank"])

In [23]:
char_item_rank_df["rank"] = char_item_rank_df["rank"].astype("float")

In [24]:
# Convert a list of items to vectors
def item_vector_lookup(item_list, d=item_vector_dict):
    res = []
    for items in item_list:
        temp_res = []
        for item in items:
            temp_res = temp_res + d[item].tolist()
        res.append(temp_res)
    return res


In [25]:
# add item vectors to the existing data frame
temp_df = char_item_rank_df.join(pd.DataFrame(item_vector_lookup(char_item_rank_df["items"]) ,\
                                                        columns = [f"item_index{x}" for x in range(1,55)]))                

In [26]:
temp_df["rank"] = temp_df["rank"].astype('float')

In [27]:
temp_df = temp_df.drop(["level"], axis=1)

In [28]:
temp_df["top4"] = np.where(temp_df["rank"] <= 4, 1,0)

In [29]:
# Group by character_id and create a separate dataframe fro each unique character_id
chars_df_dict = {x:y for x,y in temp_df.groupby(temp_df["character_id"])}