In [11]:
import csv
import json
import requests
import numpy as np
import pandas as pd
import os

## Load data and map champion ID to Name and alphabetical index

In [12]:
filename = '../data/champion.json'
with open(filename, 'r') as f:
    champ_data = json.load(f)

# contains index of championId and name
id_champ_map = {}
champion_list = []
for i, c in enumerate(champ_data['data'].keys()):
    id_champ_map[int(champ_data['data'][c]['key'])] = {'champion': champ_data['data'][c]['id'], 'index': i}
    champion_list.append(champ_data['data'][c]['id'])

## Save data in matrix format for analysis and df

In [13]:
filename = '../data/mastery_data.json'

with open(filename, 'r') as f:
    data = json.load(f)

names = []
r = np.zeros((len(data.keys()), len(champion_list)))

for i, key in enumerate(data.keys()):
    names.append([key])
    for j, entry in enumerate(data[key]):
        if j == 0:
            maximum = entry['championPoints']
        r[i][id_champ_map[entry['championId']]['index']] = entry['championPoints']/maximum

        fields = ['player_id']
print(r)
with open('../data/names.csv', 'w') as f:
    write = csv.writer(f)
     
    write.writerow(fields)
    write.writerows(names)

np.savetxt('../data/mastery.csv', r, delimiter=',', header=','.join(champion_list))

[[0.         0.         0.02915197 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.34750708 0.35768539 ... 0.         0.         0.        ]
 ...
 [0.         0.         1.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.90094475 0.         ... 0.         0.16045564 0.        ]]


## Save data in surprise format to load into surprise
The dataset has 5326 users from the ranked pool. For each user, the dataset contains the user's 25 most played champions and each champion's corresponding mastery score for the user. Champion mastery score is highly correlated with playtime.

I loaded in champion mastery data and set each user's highest played champion's mastery score to 1. I then divide every other champion's mastery point score by the highest played champion's mastery point score. We assume that any champion after the 25th has a rating of 0. This is a crude approximation since the mean of the 25th highest mastery score was 0.09. The rating scale is therefore 0 to 1.

In [19]:
filename = '../data/mastery_data.json'

with open(filename, 'r') as f:
    data = json.load(f)

ratings = [] # list of items
total = 0
count = 0
for i, key in enumerate(data.keys()):
    for j, entry in enumerate(data[key]):
        if j == 0:
            # sorted in descending order
            maximum = entry['championPoints']
        if j == 24:
            total += entry['championPoints']/maximum
            count += 1

print(f'Mean of 25th highest champion score: {total/count}')

Mean of 25th highest champion score: 0.09353007923023157


In [30]:
filename = '../data/mastery_data.json'

with open(filename, 'r') as f:
    data = json.load(f)

ratings = [] # list of items
r = np.zeros((len(data.keys()), len(cols)))

for i, key in enumerate(data.keys()):
    used = []
    for j, entry in enumerate(data[key]):
        if j == 0:
            # sorted in descending order
            maximum = entry['championPoints']
        score = entry['championPoints']/maximum
        item = id_champ_map[entry['championId']]['champion']
        ratings.append([key, item, score])
        used.append(item)
    # set unknown champion IDs to zero
    for champ in cols:
        if champ not in used:
            ratings.append([key, champ, 0])
    else:
        continue

print(f'users: {i}')
print(f'ratings: {len(ratings)}')
with open('../data/surprise_data_full.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(ratings)

users: 5326
ratings: 878955


In [29]:
# save first 750 users for quicker analysis

filename = '../data/mastery_data.json'

with open(filename, 'r') as f:
    data = json.load(f)

ratings = [] # list of items
r = np.zeros((len(data.keys()), len(cols)))

for i, key in enumerate(data.keys()):
    if i >= 750:
        break
    used = []
    for j, entry in enumerate(data[key]):
        if j == 0:
            # sorted in descending order
            maximum = entry['championPoints']
        score = entry['championPoints']/maximum
        item = id_champ_map[entry['championId']]['champion']
        ratings.append([key, item, score])
        used.append(item)
    # set unknown champion IDs to zero
    for champ in cols:
        if champ not in used:
            ratings.append([key, champ, 0])
    else:
        continue

print(f'users: {i}')
print(f'ratings: {len(ratings)}')
with open('../data/surprise_data_750.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(ratings)

users: 750
ratings: 123750
