In [2]:
import random
import pandas as pd
import numpy as np

import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import implicit
#from sklearn.preprocessing import MinMaxScaler

data_df = pd.read_csv("./steam-200k.csv", sep=',', names=["UserID", "GameID", "Action", "Behavior","Extra"])
# getting rid of purchased but not played games
data_df = data_df.loc[(data_df.Action == "play")]

# First, generate dictionaries for mapping old id to new id for users and games
unique_GameID = data_df['GameID'].unique()
unique_UserID = data_df['UserID'].unique()
j = 0
user_old2new_id_dict = dict()
for u in unique_UserID:
    user_old2new_id_dict[u] = j #map old id to new id
    j += 1
j = 0
game_old2new_id_dict = dict()
for i in unique_GameID:
    game_old2new_id_dict[i] = j #map game_name to numeric id
    j += 1
    
# Then, use the generated dictionaries to reindex UserID and GameID in the data_df
user_list = data_df['UserID'].values
game_list = data_df['GameID'].values
for j in range(len(data_df)):
    user_list[j] = user_old2new_id_dict[user_list[j]]
    game_list[j] = game_old2new_id_dict[game_list[j]]
data_df['UserID'] = user_list
data_df['GameID'] = game_list
#print(data_df.head())

# generate train_df with 70% samples and test_df with 30% samples, and there should have no overlap between them.
train_index = np.random.random(len(data_df)) <= 0.7
train_df = data_df[train_index]
test_df = data_df[~train_index]

# generate train_mat and test_mat
num_user = len(data_df['UserID'].unique())
num_game = len(data_df['GameID'].unique())

train_mat = sparse.coo_matrix((train_df['Behavior'].values, (train_df['UserID'].values, train_df['GameID'].values)), shape=(num_user, num_game)).astype(float)
test_mat = sparse.coo_matrix((test_df['Behavior'].values, (test_df['UserID'].values, test_df['GameID'].values)), shape=(num_user, num_game)).astype(float).toarray()


In [3]:
# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)

# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 15
data_conf = (train_mat * alpha_val).astype('double')

#Fit the model
model.fit(data_conf)




  0%|          | 0/20 [00:00<?, ?it/s]

In [4]:
# Find the 10 most similar to Jay-Z
item_id = 0 #Jay-Z
n_similar = 10

# Use implicit to get similar items.
similar = model.similar_items(item_id, n_similar)

"""# Print the names of our most similar artists
for item in similar:
    idx, score = item
    print(data.artist.loc[data.artist_id == idx].iloc[0])"""
print(similar)
print(num_user)




[(0, 0.9999999), (8053, 0.74098337), (1635, 0.7385016), (4257, 0.7383747), (9966, 0.7174638), (8070, 0.7129957), (11138, 0.6849666), (10120, 0.68157905), (6290, 0.67159104), (3491, 0.6713574)]
11350


In [20]:
# Create a numeric user_id and artist_id column
data_df['UserID'] = data_df['UserID'].astype("category").cat.codes
data_df['GameID'] = data_df['GameID'].astype("category").cat.codes
"""data['UserID'] = data['UserID'].cat.codes
data['GameID'] = data['GameID'].cat.codes"""

# The implicit library expects data as a item-user matrix so we
# create two matricies, one for fitting the model (item-user) 
# and one for recommendations (user-item)
sparse_item_user = sparse.csr_matrix((data_df['Behavior'].astype(float), (data_df['GameID'], data_df['UserID'])))
sparse_user_item = sparse.csr_matrix((data_df['Behavior'].astype(float), (data_df['UserID'], data_df['GameID'])))
#sparse_item_user = sparse.coo_matrix((data_df['Behavior'].values, (train_df['GameID'].values,train_df['UserID'].values)), shape=(num_game,num_user)).astype(float)
#train_mat =        sparse.coo_matrix((train_df['Behavior'].values, (train_df['UserID'].values,train_df['GameID'].values)), shape=(num_user, num_game)).astype(float)

# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)

# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 15
data_conf = (sparse_item_user * alpha_val).astype('double')

#Fit the model
model.fit(data_conf)

# Find the 10 most similar to Elders Scroll
item_id = 0
n_similar = 10

# Use implicit to get similar items.
similar = model.similar_items(item_id, n_similar)
similar

  0%|          | 0/20 [00:00<?, ?it/s]

[(0, 1.0),
 (501, 0.7116198),
 (154, 0.6754619),
 (28, 0.6291469),
 (341, 0.57538116),
 (76, 0.5642129),
 (2155, 0.540547),
 (4, 0.52737325),
 (3496, 0.5214334),
 (485, 0.5196563)]

In [21]:
#similar games
for idx,score in similar:
    print(unique_GameID[idx],score)

The Elder Scrolls V Skyrim 1.0
Arma 3 0.7116198
Borderlands 2 0.6754619
Sid Meier's Civilization V 0.6291469
Terraria 0.57538116
DayZ 0.5642129
The Amazing Spider-Man 2 0.540547
Left 4 Dead 2 0.52737325
NASCAR The Game 2013 0.5214334
Warframe 0.5196563


In [7]:
# Create recommendations for user with id 0
user_id = 0

# Use the implicit recommender.
recommended = model.recommend(user_id, sparse_user_item)

recommended

for idx,score in recommended:
    print(unique_GameID[idx],score)

Unturned 1.2052162
Portal 2 1.0707977
Starbound 0.971685
Borderlands 2 0.971024
Portal 0.9323226
PAYDAY 2 0.87397075
Besiege 0.87112254
Don't Starve 0.8702251
Far Cry 3 0.8502069
Counter-Strike Global Offensive 0.8435215


In [16]:
count = 0
for game in unique_GameID:
    if 'Mort' in game:
        print(game,count)
    count+= 1

Mortal Kombat Komplete Edition 496
Mortal Kombat X 844
Mortal Kombat Kollection 977
Mortal Online 1301
ComaMortuary 2764
Larva Mortus 2811
Post Mortem 3041
Mortal Kombat Legacy II - Ep. 1 Reunited in Macau 3476


In [17]:
print(unique_GameID[496])

Mortal Kombat Komplete Edition
