In [1]:
import pandas as pd
import joblib
import numpy as np
import scipy.sparse as sparse
import random
import implicit
from sklearn.preprocessing import MinMaxScaler

### Load Dataset

In [11]:
users_df = pd.read_csv("v1/data/users.csv")
items_df = pd.read_csv("v1/data/items.csv")
user_item_interaction_df = pd.read_csv("v1/data/user_item_interactions.csv")

### 1. Lightfm

In [12]:
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
from lightfm.data import Dataset
from lightfm import LightFM

In [13]:
items_df['brand'] = items_df['brand'].astype('category')
items_df['brand'] = items_df['brand'].cat.codes
items_df['category'] = items_df['category'].astype('category')
items_df['category'] = items_df['category'].cat.codes
items_features = [(row['price'], row['brand'], row['category']) for index, row in items_df.iterrows()]
items_df.head()

Unnamed: 0,id,title,price,brand,category
0,14521,"Oxy 10, 10gm",286.2,321,119
1,12352,Make-Up Studio Brush Cleanser(Ph0900),1785.0,264,57
2,11507,Buddsbuddy Silicone Orthodontic Pacifier (1pc),169.15,57,328
3,12401,Make-Up Studio Cream Blusher,2200.0,264,42
4,14408,Set Wet Hair Gel Cool- 50 Ml,104.0,381,158


In [14]:
users_df['gender'] = users_df['gender'].astype('category')
users_df['gender'] = users_df['gender'].cat.codes
user_features =  [(row['age'], row['gender']) for index, row in users_df.iterrows()]

In [15]:
dataset = Dataset()
dataset.fit(users_df['id'].values, items_df['id'].values,user_features=user_features, item_features=items_features)

In [16]:
interactions, weights = dataset.build_interactions((row['user'], row['item'], row['rating']) for index, row in user_item_interaction_df.iterrows())

In [17]:
train, test = random_train_test_split(interactions)

In [18]:
lightfm_model = LightFM(learning_rate=0.05, loss='warp')
lightfm_model.fit(interactions, epochs=10)

<lightfm.lightfm.LightFM at 0x1364e94f0>

In [19]:
train_precision = precision_at_k(lightfm_model, train, k=10).mean()
test_precision = precision_at_k(lightfm_model, test, k=10).mean()

train_auc = auc_score(lightfm_model, train).mean()
test_auc = auc_score(lightfm_model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.09, test 0.04.
AUC: train 0.95, test 0.95.


### 2. Using Implicit

In [20]:
dataset_df = user_item_interaction_df.copy()

In [21]:
dataset_transformed = dataset_df.copy()
dataset_transformed['user'] = dataset_transformed['user'].astype("category")
dataset_transformed['user_id'] = dataset_transformed['user'].cat.codes

sparse_item_user = sparse.csr_matrix((dataset_transformed['rating'].astype(float), (dataset_transformed['item'], dataset_transformed['user_id'])))
sparse_user_item = sparse.csr_matrix((dataset_transformed['rating'].astype(float), (dataset_transformed['user_id'], dataset_transformed['item'])))

In [22]:
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50)
model.fit(interactions)

  0%|          | 0/50 [00:00<?, ?it/s]

In [36]:
from implicit.evaluation import ranking_metrics_at_k
metrics = ranking_metrics_at_k(model, train, test, K=10, num_threads=4)
metrics

  0%|          | 0/14758 [00:00<?, ?it/s]

{'precision': 0.10577496814978471,
 'map': 0.0806387600721724,
 'ndcg': 0.12058011957293072,
 'auc': 0.5695938936430563}

In [None]:
train