# Modelling

In [1]:
import pandas as pd
import numpy as np

from scipy import sparse

from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
from lightfm.cross_validation import random_train_test_split

from resources import *

#Import Warnings
import warnings
warnings.filterwarnings("ignore")



## Load data

In [2]:
# Load user items data
recdata = pd.read_csv('recdata.csv', index_col=0)
recdata = recdata.rename(columns = {'variable':'id', 'value': 'owned'})
recdata.head()

Unnamed: 0,id,uid,owned
0,10,0,1.0
1,10,1,1.0
2,10,3,1.0
3,10,4,1.0
4,10,10,1.0


In [3]:
# Load game names data
games = pd.read_csv('gamenames.csv', index_col = 0)
games.head()

Unnamed: 0,title,id
0,Lost Summoner Kitty,761140.0
1,Ironbound,643980.0
2,Real Pool 3D - Poolians,670290.0
3,弹炸人2222,767400.0
4,,773570.0


## Additional Preprocessing

### Create interaction matrix

In [4]:
# Use create_interaction_matrix function
interactions = create_interaction_matrix(df = recdata,
                                         user_col = 'uid',
                                         item_col = 'id',
                                         rating_col = 'owned')
interactions.shape

(69277, 8791)

In [5]:
interactions.head()

id,10,20,30,40,50,60,70,80,130,220,...,526790,527340,527440,527510,527520,527810,527890,527900,528660,530720
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
type(interactions)

pandas.core.frame.DataFrame

### Train test split

In [7]:
# Get number of users
len(interactions)

69277

In [8]:
# Establish number of users in train/test sets

train_num = round((80/100)*len(interactions),0)
print(f'We desire {train_num} users in our training set.')

test_num = len(interactions)-train_num
print(f'We desire {test_num} users in our test set.')

We desire 55422.0 users in our training set.
We desire 13855.0 users in our test set.


In [9]:
# Define train and test sets
train = interactions[:55422]
test = interactions[55422:]

### Create user dictionary

In [10]:
# Create user dictionary using helper function
user_dict = create_user_dict(interactions=interactions)

### Create item dictionary

In [11]:
# Create game dictionary using helper function
games_dict = create_item_dict(df = games, id_col = 'id', name_col = 'title')

### Create sparse matrices

In [16]:
# Create sparse matrices for evaluation 
train_sparse = sparse.csr_matrix(train.values)

#Add X users to Test so that the number of rows in Train match Test
N = train.shape[0] #Rows in Train set
n,m = test.shape #Rows & columns in Test set
z = np.zeros([(N-n),m]) #Create the necessary rows of zeros with m columns
#test = test.toarray() #Temporarily convert Test into a numpy array
test = np.vstack((test,z)) #Vertically stack Test on top of the blank users
test_sparse = sparse.csr_matrix(test) #Convert back to sparse

## Modelling using LightFM

### WARP loss model

In [17]:
# Instantiate and fit model
mf_model = run_model(interactions = train,
                 n_components = 30,
                 loss = 'warp',
                 epoch = 30,
                 n_jobs = 4)

In [18]:
# Get precision
train_precision = precision_at_k(mf_model, train_sparse, k=10).mean()
test_precision = precision_at_k(mf_model, test_sparse, k=10).mean()
print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))

Precision: train 0.66, test 0.19.


In [19]:
# Get AUC
train_auc = auc_score(mf_model, train_sparse).mean()
test_auc = auc_score(mf_model, test_sparse).mean()
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

AUC: train 0.99, test 0.94.


### BPR loss model

In [20]:
# Instantiate and fit model
mf_model_bpr = run_model(interactions = train,
                 n_components = 30,
                 loss = 'bpr',
                 epoch = 30,
                 n_jobs = 4)

In [21]:
# Get precision
train_precision = precision_at_k(mf_model_bpr, train_sparse, k=10).mean()
test_precision = precision_at_k(mf_model_bpr, test_sparse, k=10).mean()
print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))

Precision: train 0.75, test 0.15.


In [22]:
# Get AUC
train_auc = auc_score(mf_model_bpr, train_sparse).mean()
test_auc = auc_score(mf_model_bpr, test_sparse).mean()
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

AUC: train 0.97, test 0.73.


## Recommendations

In [23]:
rec_list = sample_recommendation_user(model = mf_model, 
                                      interactions = interactions, 
                                      user_id = 1, 
                                      user_dict = user_dict,
                                      item_dict = games_dict, 
                                      threshold = 0,
                                      num_items = 5,
                                      show_known = False, 
                                      show_recs = True)


 Recommended Items:
1- Worms Crazy Golf
2- Worms Blast
3- Worms Pinball
4- Worms Ultimate Mayhem
5- Borderlands
