# Deployment 

https://github.com/topspinj/recommender-tutorial/blob/master/part-3-implicit-feedback-recommender.ipynb


We only use ALS (efficient)

Input: User Id --> Top 5 activity; (Apply real-time (new data), handle missing users, invalid input)

# Training, get the model (fast) 

## Settings

In [1]:
OUTPUT_CLEANED_FOLDER = "output/"

## Read data

In [2]:
import os 
import pandas as pd 

df_interaction = pd.read_csv(os.path.join(OUTPUT_CLEANED_FOLDER, "user_activity_interaction_deep_cleaned.csv"))
df_user = pd.read_csv(os.path.join(OUTPUT_CLEANED_FOLDER, "user_info_deep_cleaned.csv"))
df_activity = pd.read_csv(os.path.join(OUTPUT_CLEANED_FOLDER, "activity_info_deep_cleaned.csv"))

  df_interaction = pd.read_csv(os.path.join(OUTPUT_CLEANED_FOLDER, "user_activity_interaction_deep_cleaned.csv"))


In [3]:
df_activity["id"]  = df_activity["id"].astype(str)
df_interaction = df_interaction.astype(str)
df_interaction["inter_score"] = 1
df_all = df_interaction.merge(df_activity, left_on=["activity_id"], right_on=["id"])[["user_id", "activity_id", "inter_score", "createdTime"]]
df_all["createdTime"] = pd.to_datetime(df_all["createdTime"])

In [4]:
df_all = df_all.drop_duplicates()

## Preprocessing

In [5]:
user_id_map = {i:v for i, v in enumerate(df_all["user_id"].unique())}
inverse_user_id_map = {v:i for i, v in enumerate(df_all["user_id"].unique())}
activity_id_map = {i:v for i, v in enumerate(df_all["activity_id"].unique())}
inverse_activity_id_map = {v:i for i, v in enumerate(df_all["activity_id"].unique())}

In [6]:
df_all_map = df_all.copy()

df_all_map['user_id'] = df_all_map['user_id'].map(inverse_user_id_map)
df_all_map['activity_id'] = df_all_map['activity_id'].map(inverse_activity_id_map)

In [7]:
print('df_all_map shape: ',df_all_map.shape)
print('number of users: ', len(user_id_map))
print('number of items: ', len(activity_id_map))
print('The sparse matrix is one with shape (%d , %d), with %d non-zero read_times'
      %(len(user_id_map),len(activity_id_map), df_all_map.shape[0]))


df_all_map shape:  (3286982, 4)
number of users:  59272
number of items:  4505
The sparse matrix is one with shape (59272 , 4505), with 3286982 non-zero read_times


## Training Model

In [8]:
import numpy as np

nb_users = len(user_id_map)
nb_articles = len(activity_id_map)
uim = np.zeros((nb_users, nb_articles), dtype=np.float32)
uim[df_all_map["user_id"], df_all_map["activity_id"]] = df_all_map["inter_score"]

In [9]:
uim.shape

(59272, 4505)

In [10]:
from scipy import sparse as sp
from implicit.als import AlternatingLeastSquares

uim = sp.csr_matrix(uim)
print('uim shape :', uim.shape)

model = AlternatingLeastSquares(factors=16, 
                                random_state=42,
                                num_threads=16,
                                alpha=128)

model.fit(user_items=uim, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
  check_blas_config()


uim shape : (59272, 4505)


100%|██████████| 15/15 [00:10<00:00,  1.39it/s]


In [11]:
model.user_factors.shape[0], model.item_factors.shape[0]

(59272, 4505)

# Deployment Interface

In [12]:
# Parameters
recommended_topK = 5
considered_most_recent_top = 20

In [None]:
hits = []
ndcgs = []

for idx in tqdm(range(len(test_rating_map))):
    rating = test_rating_map[idx]
    items = negative_test_rating_map[idx][1]
    u = rating[0]
    gtItem = rating[1]
    items.append(gtItem)
    predictions = model.rank_items(u, uim, items)
    ranklist = predictions[0][:topK]
    hr = getHitRatio(ranklist, gtItem)
    ndcg = getNDCG(ranklist, gtItem)
    hits.append(hr)
    ndcgs.append(ndcg)