### Processing user-game data

In [1]:
import gzip

def parse(path):
  g = gzip.open(path, 'r')
  for l in g:
    yield eval(l)

In [2]:
df_ui = parse('australian_users_items.json.gz')

In [3]:
ui_dict = {}

In [4]:
for d in df_ui:
    ui_dict[int(d['steam_id'])] = [{'game_id': int(item['item_id']), 'play_time': item['playtime_forever']} for item in d['items']]

In [5]:
import json

with open('ui_dict.json', 'w') as js:
    json.dump(ui_dict, js)

### Processing game data

In [6]:
df_games = parse('steam_games.json.gz')

In [7]:
from collections import defaultdict
tags = []
game_id_dict = defaultdict(list)
for d in df_games:
    if 'id' in d:
        if 'genres' in d:
            for genre in d['genres']:
                if genre not in tags:
                    tags.append(genre)
        if 'specs' in d:
            for spec in d['specs']:
                if spec not in tags:
                    tags.append(spec)

In [8]:
"""Turn game id to 1-hot vectors"""

df_games = parse('steam_games.json.gz')
num_tags = len(tags)
for d in df_games:
    if 'id' in d:
        game_id = int(d['id'])

        vec_1hot = [0]*num_tags
        if 'genres' in d:
            for genre in d['genres']:
                vec_1hot[tags.index(genre)] = 1

        if 'specs' in d:
            for spec in d['specs']:
                vec_1hot[tags.index(spec)] = 1

        game_id_dict[game_id] = vec_1hot


In [9]:
with open('game_tags.json', 'w') as js:
    json.dump({'tags': tags}, js)

In [10]:
with open('game_dict.json', 'w') as js:
    json.dump(game_id_dict, js)

### Customer Embedding

In [13]:
import numpy as np

user_history = {}
user_type = {}
# {user_id: history \in (53, T)}
for user in ui_dict:
    if len(ui_dict[user]) == 0:
        continue
    user_history[user] = []
    user_type[user] = np.zeros(num_tags)
    total_time = 0
    
    for i,game_dict in enumerate(ui_dict[user]):
        if game_dict['game_id'] in game_id_dict:
            user_history[user].append( game_id_dict[game_dict['game_id']] )
            user_type[user] += np.array(game_id_dict[game_dict['game_id']])*game_dict['play_time']
            total_time += game_dict['play_time']
            
    user_history[user] = np.array(user_history[user])
    user_type[user] /= total_time
    user_type_sum = user_type[user].sum()    
    if user_type_sum != 0:
        user_type[user] /= user_type_sum
        
    


AttributeError: 'dict' object has no attribute 'sum'

In [12]:
for p in user_type:
    print(user_type[p])
    break

[6.78686865e-01 1.48007879e-02 1.04623070e-01 1.14325204e-01
 2.29755025e-01 7.26392199e-01 1.63842546e-02 2.15944584e-01
 6.51773374e-01 1.38855333e-01 1.59957339e-01 7.96260624e-01
 6.30883585e-01 1.89411995e-01 5.21292457e-03 3.35918030e-01
 1.00460348e-01 3.49663173e-01 3.39547488e-03 2.72073307e-04
 0.00000000e+00 0.00000000e+00 0.00000000e+00 3.32076355e-01
 5.34950537e-02 0.00000000e+00 1.18885152e-01 2.55204762e-03
 2.72073307e-04 0.00000000e+00 2.21467672e-03 1.95294220e-02
 3.78219988e-01 2.63366962e-03 4.89731953e-05 7.75136853e-02
 5.49588081e-04 2.23355861e-01 4.48757713e-02 4.37330634e-02
 3.15877110e-02 3.58222599e-01 2.11624060e-01 0.00000000e+00
 0.00000000e+00 0.00000000e+00 5.40500833e-02 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 2.72073307e-04
 2.72073307e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00]
