In [36]:
import csv
import json
import requests
import numpy as np
import pandas as pd
import os

from surprise import BaselineOnly, Dataset, Reader
from surprise.model_selection import cross_validate

## Load data and map champion ID to Name and alphabetical index

In [2]:
filename = 'data/champion.json'
with open(filename, 'r') as f:
    champ_data = json.load(f)

# contains index of championId and name
id_champ_map = {}
cols = []
for i, c in enumerate(champ_data['data'].keys()):
    id_champ_map[int(champ_data['data'][c]['key'])] = {'champion': champ_data['data'][c]['id'], 'index': i}
    cols.append(champ_data['data'][c]['id'])

print(cols)
print(len(cols))
print(id_champ_map)

['Aatrox', 'Ahri', 'Akali', 'Akshan', 'Alistar', 'Amumu', 'Anivia', 'Annie', 'Aphelios', 'Ashe', 'AurelionSol', 'Azir', 'Bard', 'Belveth', 'Blitzcrank', 'Brand', 'Braum', 'Briar', 'Caitlyn', 'Camille', 'Cassiopeia', 'Chogath', 'Corki', 'Darius', 'Diana', 'Draven', 'DrMundo', 'Ekko', 'Elise', 'Evelynn', 'Ezreal', 'Fiddlesticks', 'Fiora', 'Fizz', 'Galio', 'Gangplank', 'Garen', 'Gnar', 'Gragas', 'Graves', 'Gwen', 'Hecarim', 'Heimerdinger', 'Illaoi', 'Irelia', 'Ivern', 'Janna', 'JarvanIV', 'Jax', 'Jayce', 'Jhin', 'Jinx', 'Kaisa', 'Kalista', 'Karma', 'Karthus', 'Kassadin', 'Katarina', 'Kayle', 'Kayn', 'Kennen', 'Khazix', 'Kindred', 'Kled', 'KogMaw', 'KSante', 'Leblanc', 'LeeSin', 'Leona', 'Lillia', 'Lissandra', 'Lucian', 'Lulu', 'Lux', 'Malphite', 'Malzahar', 'Maokai', 'MasterYi', 'Milio', 'MissFortune', 'MonkeyKing', 'Mordekaiser', 'Morgana', 'Naafiri', 'Nami', 'Nasus', 'Nautilus', 'Neeko', 'Nidalee', 'Nilah', 'Nocturne', 'Nunu', 'Olaf', 'Orianna', 'Ornn', 'Pantheon', 'Poppy', 'Pyke', 'Qiy

## Save data in matrix format for analysis and df

In [15]:
filename = 'data/mastery_data.json'

with open(filename, 'r') as f:
    data = json.load(f)

names = []
r = np.zeros((len(data.keys()), len(cols)))

for i, key in enumerate(data.keys()):
    names.append([key])
    for j, entry in enumerate(data[key]):
        if j == 0:
            maximum = entry['championPoints']
        r[i][id_champ_map[entry['championId']]['index']] = entry['championPoints']/maximum
fields = ['player_id']
print(r)
with open('data/names.csv', 'w') as f:
    write = csv.writer(f)
     
    write.writerow(fields)
    write.writerows(names)

np.savetxt('data/mastery.csv', r, delimiter=',', header=','.join(cols))

[[0.         0.         0.02915197 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.34750708 0.35768539 ... 0.         0.         0.        ]
 ...
 [0.         0.01648805 0.         ... 0.         0.         0.        ]
 [0.         0.01841106 0.         ... 0.         0.         0.        ]
 [0.         0.16450107 0.15031608 ... 0.         0.         0.        ]]


## Save Data in surprise format to load into surprise

In [34]:
filename = 'data/mastery_data.json'

with open(filename, 'r') as f:
    data = json.load(f)

nameDict = {}
fields = ["user", "item", "rating"]

ratings = [] # list of items
r = np.zeros((len(data.keys()), len(cols)))

for i, key in enumerate(data.keys()):
    nameDict[i + 1] = key
    for j, entry in enumerate(data[key]):
        if j == 0:
            # sorted in descending order
            maximum = entry['championPoints']
            
        score = entry['championPoints']/maximum
        item = id_champ_map[entry['championId']]['index']
        ratings.append([i+1, item, score]) 

with open('data/data_surprise_format.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(ratings)

In [4]:
df = pd.read_csv('data/mastery.csv')
n = pd.read_csv('data/names.csv')

print(df)

      # Aatrox      Ahri     Akali    Akshan   Alistar     Amumu  Anivia  \
0          0.0  0.000000  0.029152  0.000000  0.000000  0.000000     0.0   
1          0.0  0.000000  0.000000  0.000000  0.000000  0.000000     0.0   
2          0.0  0.347507  0.357685  0.000000  0.000000  0.241668     0.0   
3          0.0  0.000000  0.279145  0.000000  0.000000  0.000000     0.0   
4          0.0  0.000000  0.000000  0.322004  0.355775  0.221738     0.0   
...        ...       ...       ...       ...       ...       ...     ...   
2839       0.0  0.064267  0.244775  0.000000  0.000000  0.000000     0.0   
2840       0.0  0.000000  0.000000  0.000000  0.000000  0.000000     0.0   
2841       0.0  0.016488  0.000000  0.000000  0.000000  0.014501     0.0   
2842       0.0  0.018411  0.000000  0.000000  0.000000  0.000000     0.0   
2843       0.0  0.164501  0.150316  0.000000  0.000000  0.000000     0.0   

      Annie  Aphelios      Ashe  ...      Yone    Yorick     Yuumi       Zac  \
0      

In [5]:
df.loc['total'] = df.sum(numeric_only=True, axis = 0)

## Test on sample data

In [9]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

data = Dataset.load_builtin('ml-100k')
algo = SVD()
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /Users/marcotan/.surprise_data/ml-100k
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9384  0.9378  0.9384  0.9331  0.9374  0.9370  0.0020  
MAE (testset)     0.7378  0.7421  0.7376  0.7390  0.7382  0.7389  0.0016  
Fit time          0.64    0.65    0.64    0.64    0.64    0.64    0.00    
Test time         0.18    0.16    0.12    0.15    0.16    0.15    0.02    


{'test_rmse': array([0.93841427, 0.93779179, 0.93840339, 0.93312829, 0.93739769]),
 'test_mae': array([0.73780342, 0.74208113, 0.73764253, 0.73895569, 0.7381667 ]),
 'fit_time': (0.635577917098999,
  0.6460738182067871,
  0.6441450119018555,
  0.6368138790130615,
  0.636132001876831),
 'test_time': (0.17853474617004395,
  0.15749692916870117,
  0.12292194366455078,
  0.15475797653198242,
  0.1565570831298828)}

## Perform on my data

In [42]:
file_path = os.path.expanduser("data/data_surprise_format.csv")
reader = Reader(sep=",", rating_scale=(0,1))
data = Dataset.load_from_file(file_path, reader)

algo = SVD()
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.2255  0.2256  0.2251  0.2262  0.2256  0.2256  0.0004  
MAE (testset)     0.1532  0.1527  0.1542  0.1537  0.1534  0.1534  0.0005  
Fit time          0.44    0.45    0.43    0.44    0.42    0.44    0.01    
Test time         0.09    0.16    0.09    0.09    0.13    0.11    0.03    


{'test_rmse': array([0.22551968, 0.22556659, 0.22506162, 0.22620993, 0.22557775]),
 'test_mae': array([0.15319351, 0.1526554 , 0.15415729, 0.15369397, 0.15342994]),
 'fit_time': (0.4442250728607178,
  0.44663500785827637,
  0.43267107009887695,
  0.44251108169555664,
  0.42369508743286133),
 'test_time': (0.09103012084960938,
  0.1619718074798584,
  0.08678889274597168,
  0.08718109130859375,
  0.12565994262695312)}