In [1]:
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix as sparse_matrix

In [2]:
# Load Data

data = pd.read_csv('steam-200k.csv', names=['user', 'game', 'behaviour', 'hours', '0'], usecols=[0,1,2,3])

# Filter only play time

data_play = data[data['behaviour']=='play']

# Construct R matrix

n_games = data_play['game'].nunique()
print("Number of Games: %s" % n_games)

n_users = data_play['user'].nunique()
print("Number of Users: %s" % n_users)

data_play

Number of Games: 3600
Number of Users: 11350


Unnamed: 0,user,game,behaviour,hours
1,151603712,The Elder Scrolls V Skyrim,play,273.0
3,151603712,Fallout 4,play,87.0
5,151603712,Spore,play,14.9
7,151603712,Fallout New Vegas,play,12.1
9,151603712,Left 4 Dead 2,play,8.9
11,151603712,HuniePop,play,8.5
13,151603712,Path of Exile,play,8.1
15,151603712,Poly Bridge,play,7.5
17,151603712,Left 4 Dead,play,3.3
19,151603712,Team Fortress 2,play,2.8


In [3]:
def create_user_game_matrix(data,user_key='user',game_key='game'):

    n_users = len(set(data[user_key]))
    n_games = len(set(data[game_key]))

    user_mapper = dict(zip(np.unique(data[user_key]), list(range(n_users))))
    game_mapper = dict(zip(np.unique(data[game_key]), list(range(n_games))))

    user_inverse_mapper = dict(zip(list(range(n_users)), np.unique(data[user_key])))
    game_inverse_mapper = dict(zip(list(range(n_games)), np.unique(data[game_key])))

    user_ind = [user_mapper[i] for i in data[user_key]]
    game_ind = [game_mapper[i] for i in data[game_key]]

    X = sparse_matrix((data['hours'], (user_ind, game_ind)), shape=(n_users,n_games)).toarray()
    
    return X, user_mapper, game_mapper, user_inverse_mapper, game_inverse_mapper, user_ind, game_ind  

In [4]:
# Create user_game_playtime sparse matrix

user_game_hours, user_map, game_map, user_inv_map, game_inv_map, user_ind, game_ind = create_user_game_matrix(data_play)

In [9]:
user_game_hours.transpose().shape

(3600, 11350)