In [78]:
import pandas as pd 
import numpy as np 
import gzip
import json
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from scipy.sparse.linalg import svds
from sklearn.metrics import root_mean_squared_error

In [2]:
def readfile(path):
  g = gzip.open(path, 'rt', encoding='utf-8')
  values = []
  for l in g:
    fields = eval(l)
    values.append(fields)
  return values

In [3]:
users_items = readfile('australian_users_items.json.gz')

In [4]:
user_id, item_id, item_name, play_time = [], [], [], []
for user in users_items:
    uid = user['user_id']
    for item in user['items']:
        user_id.append(uid)
        item_id.append(item['item_id'])
        item_name.append(item['item_name'])
        play_time.append(int(item['playtime_forever']))

In [5]:
users_data = {
    'user_id': user_id,
    'item_id': item_id,
    'item_name': item_name,
    'play_time': play_time
} 

users_info = pd.DataFrame(users_data)
users_info

Unnamed: 0,user_id,item_id,item_name,play_time
0,76561197970982479,10,Counter-Strike,6
1,76561197970982479,20,Team Fortress Classic,0
2,76561197970982479,30,Day of Defeat,7
3,76561197970982479,40,Deathmatch Classic,0
4,76561197970982479,50,Half-Life: Opposing Force,0
...,...,...,...,...
5153204,76561198329548331,346330,BrainBread 2,0
5153205,76561198329548331,373330,All Is Dust,0
5153206,76561198329548331,388490,One Way To Die: Steam Edition,3
5153207,76561198329548331,521570,You Have 10 Seconds 2,4


In [6]:
users_info_filtered = users_info[users_info['play_time'] >= 120]
users_info_filtered = users_info_filtered.sort_values(by='play_time', ascending=False)
users_info_filtered

Unnamed: 0,user_id,item_id,item_name,play_time
587715,wolop,4000,Garry's Mod,642773
2499068,Evilutional,212200,Mabinogi,635295
4075729,76561198019826668,42710,Call of Duty: Black Ops - Multiplayer,632452
1495340,76561198039832932,4000,Garry's Mod,613411
1836985,tsunamitad,72200,Universe Sandbox,600068
...,...,...,...,...
1761488,Steamified,262260,Jets'n'Guns Gold,120
2951924,magentaairship,237930,Transistor,120
3544773,76561198063645161,498240,Batman - The Telltale Series,120
1528584,sesepe,302690,Metal Dead,120


In [7]:
users_grp = users_info_filtered.groupby('item_name')['play_time'].apply(lambda x: all(x==x.iloc[0]))

In [8]:
removed = users_grp[users_grp].index

In [9]:
users_info_filtered = users_info_filtered[users_info_filtered['item_name'].isin(removed)==False]

In [10]:
item_names = users_info_filtered['item_name'].unique()
keep_names = np.random.choice(item_names, size=1000, replace=False)
len(keep_names)

1000

In [11]:
sampled_users = users_info_filtered[users_info_filtered['item_name'].isin(keep_names)]

In [12]:
# sampled_users = users_info_filtered.sample(n=5000, random_state=42)
# sampled_users = sampled_users.sort_values(by='play_time', ascending=False)
# sampled_users

In [13]:
um = sampled_users.pivot_table(index='user_id', columns='item_name', values='play_time')
um

item_name,神明的一天世界(God's One Day World),"10,000,000",1701 A.D.: Gold Edition,1953 - KGB Unleashed,35MM,3DMark 11,688(I) Hunter/Killer,7 Days to Die,8-Bit Armies,8-Bit Hordes,...,Zombie Party,ZombieRush,Zombitatos the end of the Pc master race,bit Dungeon+,dUpLicity ~Beyond the Lies~,eXceed 2nd - Vampire REX,how do you Do It?,iRacing,klocki,丛林守望者（Ranger of the jungle）
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,,,,,,,,,,,...,,,,,,,,,,
--ace--,,,,,,,,,,,...,,,,,,,,,,
--ionex--,,,,,,,,,,,...,,,,,,,,,,
-2SV-vuLB-Kg,,,,,,,,,,,...,,,,,,,,,,
-404PageNotFound-,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzbrunozz,,,,,,,,,,,...,,,,,,,,,,
zzeee,,,,,,,,,,,...,,,,,,,,,,
zzoptimuszz,,,,,,,,,,,...,,,,,,,,,,
zzyfo,,,,,,,,,,,...,,,,,,,,,,


In [14]:
um_imp = um.apply(lambda x: x.fillna(x.mean()), axis=1)

In [15]:
um_imp

item_name,神明的一天世界(God's One Day World),"10,000,000",1701 A.D.: Gold Edition,1953 - KGB Unleashed,35MM,3DMark 11,688(I) Hunter/Killer,7 Days to Die,8-Bit Armies,8-Bit Hordes,...,Zombie Party,ZombieRush,Zombitatos the end of the Pc master race,bit Dungeon+,dUpLicity ~Beyond the Lies~,eXceed 2nd - Vampire REX,how do you Do It?,iRacing,klocki,丛林守望者（Ranger of the jungle）
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,...,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333,2907.333333
--ace--,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,...,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000,2959.000000
--ionex--,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,...,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000,183.000000
-2SV-vuLB-Kg,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,...,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000,660.000000
-404PageNotFound-,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,...,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000,1123.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzbrunozz,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,...,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000,674.500000
zzeee,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,...,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000,766.000000
zzoptimuszz,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,...,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000,4762.500000
zzyfo,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,...,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333,383.333333


In [16]:
um_imp_cor = um_imp.corr()
um_imp_cor

item_name,神明的一天世界(God's One Day World),"10,000,000",1701 A.D.: Gold Edition,1953 - KGB Unleashed,35MM,3DMark 11,688(I) Hunter/Killer,7 Days to Die,8-Bit Armies,8-Bit Hordes,...,Zombie Party,ZombieRush,Zombitatos the end of the Pc master race,bit Dungeon+,dUpLicity ~Beyond the Lies~,eXceed 2nd - Vampire REX,how do you Do It?,iRacing,klocki,丛林守望者（Ranger of the jungle）
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
神明的一天世界(God's One Day World),1.000000,0.999776,0.999925,0.997651,0.999853,0.999989,0.999994,0.962503,0.999795,0.999977,...,0.999994,0.999997,0.999997,0.999997,0.999988,0.999956,0.999995,0.999892,0.999995,0.999986
10000000,0.999776,1.000000,0.999709,0.997437,0.999638,0.999774,0.999779,0.962292,0.999580,0.999762,...,0.999779,0.999783,0.999783,0.999782,0.999763,0.999742,0.999781,0.999682,0.999780,0.999783
1701 A.D.: Gold Edition,0.999925,0.999709,1.000000,0.997581,0.999783,0.999918,0.999924,0.962436,0.999725,0.999907,...,0.999924,0.999927,0.999927,0.999927,0.999912,0.999886,0.999925,0.999822,0.999924,0.999923
1953 - KGB Unleashed,0.997651,0.997437,0.997581,1.000000,0.997510,0.997645,0.997650,0.960247,0.997451,0.997633,...,0.997650,0.997653,0.997653,0.997653,0.997638,0.997612,0.997651,0.997548,0.997650,0.997649
35MM,0.999853,0.999638,0.999783,0.997510,1.000000,0.999847,0.999852,0.962357,0.999653,0.999835,...,0.999852,0.999855,0.999856,0.999855,0.999840,0.999814,0.999854,0.999750,0.999853,0.999851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
eXceed 2nd - Vampire REX,0.999956,0.999742,0.999886,0.997612,0.999814,0.999949,0.999955,0.962468,0.999756,0.999938,...,0.999955,0.999958,0.999958,0.999958,0.999943,1.000000,0.999956,0.999853,0.999955,0.999954
how do you Do It?,0.999995,0.999781,0.999925,0.997651,0.999854,0.999989,0.999994,0.962505,0.999795,0.999978,...,0.999995,0.999998,0.999998,0.999998,0.999982,0.999956,1.000000,0.999893,0.999995,0.999994
iRacing,0.999892,0.999682,0.999822,0.997548,0.999750,0.999886,0.999891,0.962401,0.999692,0.999874,...,0.999891,0.999894,0.999894,0.999894,0.999879,0.999853,0.999893,1.000000,0.999892,0.999890
klocki,0.999995,0.999780,0.999924,0.997650,0.999853,0.999985,0.999993,0.962503,0.999794,0.999975,...,0.999994,0.999997,0.999997,0.999997,0.999982,0.999955,0.999995,0.999892,1.000000,0.999993


In [17]:
nn = NearestNeighbors(n_neighbors=4)
nn.fit(um_imp_cor)

In [18]:
neighbors = nn.kneighbors(um_imp_cor, return_distance=False)

In [19]:
def build_model(uid, sampled, corrmat, neighbors, n):

  played = sampled.loc[sampled['user_id']==uid, 'item_name']
  items_play_time = sampled.loc[(sampled['user_id']==uid) & (sampled['play_time']>= 5), 'item_name']
  best_list = []

  for item in items_play_time:
    idx = corrmat.index.get_loc(item)
    nearest = [corrmat.index[i] for i in neighbors[idx,1:] if corrmat.index[i] not in played]
    best_list += list(nearest)

  return pd.Series(best_list).value_counts()[:n]

In [20]:
unique_user_ids_list = sampled_users['user_id'].unique().tolist()
unique_user_ids_list

['poedgirl',
 'Forty-Nine',
 '76561198069163682',
 '756755557153145',
 '76561198032459795',
 'UnethicalPanda',
 'DoctorSpaceman',
 'DodgM8',
 '76561198006084032',
 '76561198073043666',
 'OdetteArt',
 '131312',
 'x-code-x',
 '76561197971366625',
 'ZombieBiscuit',
 '76561198084123358',
 '76561198081997653',
 'ScreamingAngels',
 '76561198120814021',
 'l0se',
 'JinDoritos',
 '76561198041008266',
 '76561198018677971',
 '76561198088447673',
 'DrumEssence',
 'glaze_evas',
 '76561198016650326',
 '76561198070407298',
 'xfactor7115',
 'iSkulz',
 '76561198047697394',
 'suckmynya',
 'jqinnewsteam',
 'rcdarkangel',
 '76561198008565345',
 'yammyganggang',
 'SOLOMAN151',
 '76561198012396712',
 '76561198056801459',
 '76561198007208545',
 '76561198070015725',
 '76561198074699163',
 '76561198017614917',
 '76561198059162959',
 'Noxinator',
 '76561198096678177',
 '76561198086081043',
 '76561198065880238',
 'djoker888',
 '76561197992417379',
 'briggsy195',
 '76561198058536640',
 'shameless_1',
 'Strider95'

In [27]:
build_model('kzkyus', sampled_users, um_imp_cor, neighbors, 3)

Lego Harry Potter    2
DayZ                 1
Crusader Kings II    1
Name: count, dtype: int64

In [94]:
U, sigma, Vt = svds(um_imp.to_numpy(), k=10, random_state=42)
sigma = np.diag(sigma)
um_repro = U@sigma@Vt
# um_repro += um_means.values.reshape(-1,1)

In [95]:
um_repro = pd.DataFrame(um_repro, index=um_imp.index, columns=um_imp.columns)
um_repro

item_name,神明的一天世界(God's One Day World),"10,000,000",1701 A.D.: Gold Edition,1953 - KGB Unleashed,35MM,3DMark 11,688(I) Hunter/Killer,7 Days to Die,8-Bit Armies,8-Bit Hordes,...,Zombie Party,ZombieRush,Zombitatos the end of the Pc master race,bit Dungeon+,dUpLicity ~Beyond the Lies~,eXceed 2nd - Vampire REX,how do you Do It?,iRacing,klocki,丛林守望者（Ranger of the jungle）
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,2907.322566,2905.975922,2907.138572,2907.842424,2906.953086,2907.301766,2907.243660,2832.626865,2906.901030,2906.983035,...,2907.265727,2907.301118,2907.302271,2907.298391,2907.258625,2907.395743,2907.285093,2907.452472,2907.313474,2907.273059
--ace--,2959.032417,2958.151111,2958.780335,2959.683846,2959.059526,2958.926191,2958.954745,2986.416397,2958.967833,2958.920084,...,2958.966929,2958.984788,2958.987730,2958.983713,2958.977090,2958.995213,2958.976413,2959.090039,2958.997475,2958.925930
--ionex--,183.002005,182.947500,182.986415,183.042293,183.003681,182.995435,182.997201,184.695573,182.998011,182.995058,...,182.997955,182.999059,182.999241,182.998993,182.998583,182.999704,182.998541,183.005568,182.999844,182.995419
-2SV-vuLB-Kg,660.009493,659.805406,659.950755,660.146746,660.015449,659.983413,659.989853,666.134748,659.984823,659.982075,...,659.992616,659.996584,659.997243,659.996316,659.998569,659.997920,659.994654,660.019402,659.999407,659.980496
-404PageNotFound-,1123.578661,1123.111824,1123.491480,1124.028708,1123.257485,1123.539553,1123.517204,1190.157272,1124.349265,1123.396320,...,1123.539861,1123.563509,1123.564996,1123.562203,1123.557210,1121.547167,1123.539919,1124.239358,1123.573410,1123.547840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzbrunozz,674.512063,674.118340,674.463879,674.612510,674.375092,674.505888,674.481747,656.781096,674.536872,674.397748,...,674.491512,674.502213,674.502402,674.501356,674.498257,674.524832,674.496104,674.542785,674.505727,674.489526
zzeee,766.012125,765.769445,765.950586,766.147095,766.013541,765.988057,765.993117,767.424215,765.992635,765.977906,...,765.996332,766.001218,766.001900,766.000871,765.998114,765.978878,765.999021,766.032896,766.004323,765.987666
zzoptimuszz,4762.596748,4759.339419,4762.279494,4763.187046,4761.275265,4762.597797,4762.351470,4578.059288,4762.869767,4761.570000,...,4762.437718,4762.529691,4762.529826,4762.522121,4762.496303,4762.739500,4762.476159,4762.857205,4762.556040,4762.441916
zzyfo,383.361386,383.236769,383.307667,383.454858,383.369795,383.328864,383.336064,407.280888,383.560439,383.345349,...,383.341423,383.343542,383.344174,383.343467,383.356836,383.391377,383.341027,383.337317,383.346090,383.321573


In [96]:
def build_svd_model(uid, sampled, um, n):
  
  played = sampled.loc[sampled['user_id']==uid, 'item_name']
  
  user_games = um.loc[uid, :].sort_values(ascending=False)
  
  user_games = user_games.drop(index=played, errors='ignore')
  
  return user_games.index[:n]

In [97]:
build_svd_model('UnethicalPanda', sampled_users, um_repro, 3)

Index(['NEKOPARA Vol. 0', 'GooCubelets 2', 'Revolution Ace'], dtype='object', name='item_name')

In [98]:
rmse = root_mean_squared_error(um_imp.to_numpy().flatten(), um_repro.to_numpy().flatten())
print(f"RMSE between two ultility matrices: {rmse}")

RMSE between two ultility matrices: 158.32246103302427
