In [1]:
import pandas as pd
import json

RANKED_STATS_JSON = "data\Classic_Ranked_Statistics 2023_03_13.json"

with open(RANKED_STATS_JSON, 'r', encoding= "utf8") as f:
    ranked_stats_json = json.load(f)


In [3]:
co_raw_df = pd.DataFrame.from_dict(ranked_stats_json['coInfoArray'], orient= 'columns')
map_raw_df = pd.DataFrame.from_dict(ranked_stats_json['mapInfoArray'], orient= 'columns')
user_raw_df = pd.DataFrame.from_dict(ranked_stats_json['userInfoArray'], orient= 'columns')
replay_raw_df = pd.DataFrame.from_dict(ranked_stats_json['replayInfoArray'], orient= 'columns')

# Splits the nested dict json structure into separate picks and bans tables
ban_raw_df = pd.json_normalize(
    data= ranked_stats_json['replayInfoArray'], 
    record_path= ['playerInfoArray', 'bannedCoCategoryIdArray'], 
    meta= ['replayId', ['playerInfoArray', 'userId']])

pick_raw_df = pd.json_normalize(
    data= ranked_stats_json['replayInfoArray'], 
    record_path= 'playerInfoArray', 
    meta= 'replayId')


In [43]:
co_df = co_raw_df.rename(columns= {'name': 'coName'})

In [44]:
# Only using the english name for maps
map_df = map_raw_df.rename(columns= {'englishName': 'mapName'})

In [45]:
user_df = user_raw_df.rename(columns= {'nickname': 'userName'})

In [46]:
# Merges with CO and user data on their respective ID
pick_df = pick_raw_df
pick_df = pick_df.merge(co_df, how='left', left_on= 'pickedCoCategoryId', right_on= 'categoryId')
pick_df = pick_df.merge(user_df[['userId', 'userName']], how= 'left', on= 'userId')
pick_df = pick_df.drop(['bannedCoCategoryIdArray', 'categoryId', 'pickedCoCategoryId', 'userId'], axis= 'columns')

In [47]:
# Merges with CO and user data on their respective ID
ban_df = ban_raw_df
ban_df = ban_df.rename(columns= {0: 'categoryId', 'playerInfoArray.userId': 'userId'})
ban_df = ban_df.merge(co_df, on= 'categoryId')
ban_df = ban_df.merge(user_df[['userId', 'userName']], how= 'left', on= 'userId')
ban_df = ban_df.drop(['categoryId', 'userId'], axis= 'columns')

In [48]:
# Merges with map and user data on their respective ID
replay_df = replay_raw_df
replay_df = replay_df.merge(map_df[['mapId', 'mapName']], on= 'mapId')
replay_df = replay_df.merge(pick_df, how= 'left', left_on= ['winnerPlayerIndex', 'replayId'], right_on= ['playerIndex', 'replayId'])
replay_df = replay_df.rename(columns= {'coName': 'winnerCoName', 'userName': 'winnerUserName'})
replay_df = replay_df.drop(['configVersion', 'playerInfoArray', 'playerIndex'], axis= 'columns')

In [49]:
# Saves cleaned dataframe to csv files
# co_df.to_csv("data/co_data.csv", index= False)
# map_df.to_csv("data/map_data.csv", index= False)
# user_df.to_csv("data/user_data.csv", index= False)
# replay_df.to_csv("data/replay_data.csv", index= False)
# pick_df.to_csv("data/pick_data.csv", index= False)
# ban_df.to_csv("data/ban_data.csv", index= False)