In [None]:
"""
Generate CSV file based on matches and timelines dataset

For now, the attributes are ['totalGold', 'totalMinionsKilled', 'totalDamageDoneToChampions', 'xp', 'level']
Feel free to add more. Make sure to check the schema carefully.


To get the first 1000 lines, run: cat timelines.csv | head -n 1001 > mock_timelines.csv
"""

In [1]:
import json
import csv
import requests
import timeit
import re

In [2]:
"""
FORMAT OF CSV FILE:
label: 0 if team_zero wins, 1 if team_zero loses
ratio_<stat>: ratio of <stats> team_one/team_zero

NOTES:
- The dataset reaches the next match whenever we meet a row full of 1.0 (Both teams have the same starting stats)
"""

'\nFORMAT OF CSV FILE:\nlabel: 0 if team_zero wins, 1 if team_zero loses\nratio_<stat>: ratio of <stats> team_one/team_zero\n\nNOTES:\n- The dataset reaches the next match whenever we meet a row full of 1.0 (Both teams have the same starting stats)\n'

In [3]:
# Accumulate team's data in one frame
def accumulate_team_data_in_frame(team, participantFrame):
    team['totalGold'] += participantFrame['totalGold']
    team['totalMinionsKilled'] += participantFrame['minionsKilled'] + participantFrame['jungleMinionsKilled']
    team['totalDamageDoneToChampions'] += participantFrame['damageStats']['totalDamageDoneToChampions']
    team['xp'] += participantFrame['xp']
    team['level'] += participantFrame['level']

In [12]:
# Convert data to table
def convert_to_table(match_ids, region):
    csv_file = open('timelines.csv', 'w')
    writer = csv.writer(csv_file)
    writer.writerow(['label', 'ratio_totalGold',
                    'ratio_totalMinionsKilled',
                    'ratio_totalDamageDoneToChampions',
                    'ratio_xp',
                    'ratio_level'])
    
    failed_data = 0
    for i in range(len(match_ids)):   
        match_id = match_ids[i]
        
        match_file = open(f'./matches/{region}_{match_id}.json')
        timeline_file = open(f'./timelines/{region}_{match_id}.json')        
        
        data = json.load(timeline_file)
        team_zero_win = json.load(match_file)['info']['teams'][0]['win']
        
        # label
        label = '0' if team_zero_win else 1
        
        # Attributes of the table
        attributes = ['totalGold', 'totalMinionsKilled', 'totalDamageDoneToChampions', 'xp', 'level']
        try:
            frames = data['info']['frames']
            for frame in frames:
                row_data = [label]
                team_zero = {key: 0 for key in attributes}
                team_one = {key: 0 for key in attributes}

                participantFrames = frame['participantFrames']
                for i in range(1, 11):
                    participantFrame = participantFrames[str(i)]
                    # team_zero
                    if int(i) <= 5:
                        accumulate_team_data_in_frame(team_zero, participantFrame)
                    # team_one
                    else:
                        accumulate_team_data_in_frame(team_one, participantFrame)

                # Get the ratio of the stats
                for attr in attributes:
                    if team_zero[attr] == 0:
                        row_data.append(1.0)
                    else:
                        row_data.append(team_one[attr] / team_zero[attr])

                # Write csv
                writer.writerow(row_data)
        
        except:
            failed_data += 1
            print(match_id)
            continue
            
    csv_file.close()

In [13]:
start = timeit.default_timer()
url = "https://canisback.com/matchId/matchlist_na1.json"
res = requests.get(url)
match_ids = json.loads(res.text)
region = re.split(r'[_, .]', url)[-2].upper()

convert_to_table(match_ids, region)

stop = timeit.default_timer()
print('Time: ', stop - start)

4035112136
4035116571
4035152728
4035157361
4035168348
4035176114
4035185272
Time:  84.24330387500049
