# Model recommendation with lighfm

### Import libraries

In [233]:
import os
import sys
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scipy
import numpy as np
import pandas as pd
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, recall_at_k
from lightfm.cross_validation import random_train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from lightfm import cross_validation
import scipy.sparse as sp
from scipy import sparse

### Defining variables

In [234]:
import json

with open('config.json', 'r') as f:
    config = json.load(f)

In [235]:
dtype_df_valid = {
"userId" : 'string',
"userType" : 'category',
"history" : 'string',
"timestampHistory" : 'string'
}

dtype_df_train_score = {
"userId" : 'string',
"userType" : 'category',
"history" : 'string',
"score" : 'Float32'
}

In [236]:
K_LIGHTFM_ITEMS = 5
K_POPULAR_ITEMS = 5
K_SAMPLED_ITEMS = 5
LIMIT_N_ROWS = 10000
N_PARTS_DASK = 3
SEED = 42

### Retrieve data

In [237]:
import pandas as pd
# path config

# df_valid = pd.read_csv(config["VALID_DF"],dtype=dtype_df_valid)
df_valid = pd.read_csv(config["VALID_DF"],dtype=dtype_df_valid,nrows=LIMIT_N_ROWS)
df_valid.dropna()

Unnamed: 0,userId,userType,history,timestampHistory
0,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,Logged,['be89a7da-d9fa-49d4-9fdc-388c27a15bc8'  '01c5...,[1660533136590 1660672113513]
1,d0afad7ea843d86597d822f0df1d39d31a3fea7c39fdee...,Logged,['77901133-aee7-4f7b-afc0-652231d76fe9'],[1660556860253]
2,755062dd39a48809880cf363b04268c3af2c003088cde0...,Logged,['857aa90f-a7ec-410d-ba82-dfa4f85d4e71'],[1660561649242]
3,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,Logged,['b7b90e18-7613-4ca0-a8fc-fd69addfcd85'  '835f...,[1660533830245 1660540831707 1660542659111 166...
4,a120515626fe5d12b22b7d5a7c5008912cc69284aa26cc...,Logged,['9c764c3a-f9f8-4fb2-b2c4-6331eaeb3dd6'  'b8eb...,[1660548813953 1660572329731 1660594848200]
...,...,...,...,...
9995,eb82748abb8faabca586f788a10887170105475b16e575...,Logged,['2277cab9-798b-4b0e-8f7c-cb2862ad3e4c'],[1660675022675]
9996,432bd2d2f562efcc59a3943b943b8783788c3526801761...,Logged,['35e20551-659a-415f-95fe-7acbb89b69f6'],[1660571720373]
9997,d80b12031cb45593376c1c1776f52091ec24f3b09b671e...,Logged,['5f05a0e2-5e92-47e3-b8c2-8d15ee772db3'],[1660665278986]
9998,1146778da61716481bd5bcfd985c948c52671d3d7a47d7...,Logged,['f52361e4-9206-49c4-8117-2451c0b0c6f1'],[1660677752418]


In [None]:
import pandas as pd

# df_ratings = pd.read_csv(config["DF_TRAIN_SCORES"], dtype=dtype_df_train_score)
df_ratings = pd.read_csv(config["DF_TRAIN_SCORES"], dtype=dtype_df_train_score, nrows=LIMIT_N_ROWS)
df_ratings.drop(columns=["Unnamed: 0","score"],inplace=True)
df_ratings

Unnamed: 0,userId,history,userType
0,fbb963d61eb8149e7f43b1bd905457ba5e106a830ddc27...,80aa7bb2-adce-4a55-9711-912c407927a1,Non-Logged


In [239]:
df_news = pd.read_csv(config["DF_ITEMS_FEATURE"])
df_news.drop(columns=["Unnamed: 0"],inplace=True)
df_news.head(3)

Unnamed: 0,page,age_exp_normalized,ageCategories
0,7371a9b5-5824-4c57-8704-00a74feebe79,0.151439,very-old
1,7a5ea08f-4583-49e2-ba52-a71999443f7b,0.140788,very-old
2,6afc8bbb-4f36-43d5-8a44-a2917df5621a,0.12261,very-old


In [240]:
import pickle

loaded_model = pickle.load(open('artifacts/lightfm_model.pkl', 'rb'))
loaded_user_id_map = pickle.load(open('artifacts/user_id_map.pkl', 'rb'))
loaded_item_id_map_reverse = pickle.load(open('artifacts/item_id_map_reverse.pkl', 'rb'))
loaded_user_feature_map = pickle.load(open('artifacts/user_feature_map.pkl', 'rb'))
loaded_interactions_shape = pickle.load(open('artifacts/interactions_shape.pkl', 'rb'))

loaded_n_users, loaded_n_items = loaded_interactions_shape

### Make predictions to known and unknowm on same recommendation function with pkls

In [241]:
from utils.custom_treat_data_funcs import explode_df_columns

cols_to_explode = ["history", "timestampHistory"]
cols_and_id = cols_to_explode.copy()
cols_and_id.insert(0,"userId")
cols_and_id.append("userType")
cols_and_id = tuple(cols_and_id)
exploded_df = explode_df_columns(df_valid.loc[:,cols_and_id], cols_to_explode)

In [242]:
df_valid

Unnamed: 0,userId,userType,history,timestampHistory
0,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,Logged,['be89a7da-d9fa-49d4-9fdc-388c27a15bc8'  '01c5...,[1660533136590 1660672113513]
1,d0afad7ea843d86597d822f0df1d39d31a3fea7c39fdee...,Logged,['77901133-aee7-4f7b-afc0-652231d76fe9'],[1660556860253]
2,755062dd39a48809880cf363b04268c3af2c003088cde0...,Logged,['857aa90f-a7ec-410d-ba82-dfa4f85d4e71'],[1660561649242]
3,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,Logged,['b7b90e18-7613-4ca0-a8fc-fd69addfcd85'  '835f...,[1660533830245 1660540831707 1660542659111 166...
4,a120515626fe5d12b22b7d5a7c5008912cc69284aa26cc...,Logged,['9c764c3a-f9f8-4fb2-b2c4-6331eaeb3dd6'  'b8eb...,[1660548813953 1660572329731 1660594848200]
...,...,...,...,...
9995,eb82748abb8faabca586f788a10887170105475b16e575...,Logged,['2277cab9-798b-4b0e-8f7c-cb2862ad3e4c'],[1660675022675]
9996,432bd2d2f562efcc59a3943b943b8783788c3526801761...,Logged,['35e20551-659a-415f-95fe-7acbb89b69f6'],[1660571720373]
9997,d80b12031cb45593376c1c1776f52091ec24f3b09b671e...,Logged,['5f05a0e2-5e92-47e3-b8c2-8d15ee772db3'],[1660665278986]
9998,1146778da61716481bd5bcfd985c948c52671d3d7a47d7...,Logged,['f52361e4-9206-49c4-8117-2451c0b0c6f1'],[1660677752418]


In [243]:
exploded_df

Unnamed: 0,userId,history,timestampHistory,userType
0,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,be89a7da-d9fa-49d4-9fdc-388c27a15bc8,1660533136590,Logged
1,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,01c59ff6-fb82-4258-918f-2910cb2d4c52,1660672113513,Logged
2,d0afad7ea843d86597d822f0df1d39d31a3fea7c39fdee...,77901133-aee7-4f7b-afc0-652231d76fe9,1660556860253,Logged
3,755062dd39a48809880cf363b04268c3af2c003088cde0...,857aa90f-a7ec-410d-ba82-dfa4f85d4e71,1660561649242,Logged
4,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,b7b90e18-7613-4ca0-a8fc-fd69addfcd85,1660533830245,Logged
...,...,...,...,...
19036,eb82748abb8faabca586f788a10887170105475b16e575...,2277cab9-798b-4b0e-8f7c-cb2862ad3e4c,1660675022675,Logged
19037,432bd2d2f562efcc59a3943b943b8783788c3526801761...,35e20551-659a-415f-95fe-7acbb89b69f6,1660571720373,Logged
19038,d80b12031cb45593376c1c1776f52091ec24f3b09b671e...,5f05a0e2-5e92-47e3-b8c2-8d15ee772db3,1660665278986,Logged
19039,1146778da61716481bd5bcfd985c948c52671d3d7a47d7...,f52361e4-9206-49c4-8117-2451c0b0c6f1,1660677752418,Logged


In [244]:
exploded_df = pd.concat([exploded_df, df_ratings]).reset_index()
exploded_df

Unnamed: 0,index,userId,history,timestampHistory,userType
0,0,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,be89a7da-d9fa-49d4-9fdc-388c27a15bc8,1660533136590,Logged
1,1,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,01c59ff6-fb82-4258-918f-2910cb2d4c52,1660672113513,Logged
2,2,d0afad7ea843d86597d822f0df1d39d31a3fea7c39fdee...,77901133-aee7-4f7b-afc0-652231d76fe9,1660556860253,Logged
3,3,755062dd39a48809880cf363b04268c3af2c003088cde0...,857aa90f-a7ec-410d-ba82-dfa4f85d4e71,1660561649242,Logged
4,4,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,b7b90e18-7613-4ca0-a8fc-fd69addfcd85,1660533830245,Logged
...,...,...,...,...,...
19037,19037,432bd2d2f562efcc59a3943b943b8783788c3526801761...,35e20551-659a-415f-95fe-7acbb89b69f6,1660571720373,Logged
19038,19038,d80b12031cb45593376c1c1776f52091ec24f3b09b671e...,5f05a0e2-5e92-47e3-b8c2-8d15ee772db3,1660665278986,Logged
19039,19039,1146778da61716481bd5bcfd985c948c52671d3d7a47d7...,f52361e4-9206-49c4-8117-2451c0b0c6f1,1660677752418,Logged
19040,19040,0efaf6bce7d8a08922d67dd741f872f0a8c0a6aaea3790...,3ee8ff4c-2dae-4394-8ff6-abd22556e323,1660668190898,Logged


In [245]:
exploded_df["check_hifens"] = exploded_df["history"].str.split('-').apply(lambda x : len(x) == 5).astype("bool")
exploded_df["check_size"] = exploded_df["history"].str.replace(r'-', '', regex=True).apply(lambda x : len(x) == 32).astype("bool")
exploded_df["check_chars"] = exploded_df["history"].str.replace(r'-', '', regex=True).str.replace(r'[a-f0-9]', '', regex=True).apply(lambda x : x == '').astype("bool")
exploded_df["check_history"] = (exploded_df["check_hifens"] & exploded_df["check_size"] & exploded_df["check_chars"]).astype("bool")
exploded_df["check_history"].value_counts()

check_history
True     18469
False      573
Name: count, dtype: int64

In [246]:
exploded_df_valid = exploded_df[~exploded_df["check_history"]==False][["userId","userType","history"]]
exploded_df_valid

Unnamed: 0,userId,userType,history
0,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,Logged,be89a7da-d9fa-49d4-9fdc-388c27a15bc8
1,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,Logged,01c59ff6-fb82-4258-918f-2910cb2d4c52
2,d0afad7ea843d86597d822f0df1d39d31a3fea7c39fdee...,Logged,77901133-aee7-4f7b-afc0-652231d76fe9
3,755062dd39a48809880cf363b04268c3af2c003088cde0...,Logged,857aa90f-a7ec-410d-ba82-dfa4f85d4e71
4,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,Logged,b7b90e18-7613-4ca0-a8fc-fd69addfcd85
...,...,...,...
19037,432bd2d2f562efcc59a3943b943b8783788c3526801761...,Logged,35e20551-659a-415f-95fe-7acbb89b69f6
19038,d80b12031cb45593376c1c1776f52091ec24f3b09b671e...,Logged,5f05a0e2-5e92-47e3-b8c2-8d15ee772db3
19039,1146778da61716481bd5bcfd985c948c52671d3d7a47d7...,Logged,f52361e4-9206-49c4-8117-2451c0b0c6f1
19040,0efaf6bce7d8a08922d67dd741f872f0a8c0a6aaea3790...,Logged,3ee8ff4c-2dae-4394-8ff6-abd22556e323


In [247]:
count_histories = exploded_df_valid.groupby(["history"]).size().sort_values(ascending=False).astype(dtype="UInt16")
count_histories

history
9c764c3a-f9f8-4fb2-b2c4-6331eaeb3dd6    813
4c3d47a1-6f4b-424f-8944-6c227e686c5c    614
eb23272d-8e6c-479d-b972-eabeb5f6f3dd    536
d730c4a6-e8f6-4fde-b73a-afbe148479cd    274
aeab0e46-f1e4-41e9-821b-571255c41f69    246
                                       ... 
70d987ad-a50a-4490-815a-efbeb0ecab2c      1
70e337bb-2d53-4f51-977f-4ee636ea3eb4      1
6fa217fb-92a1-48d1-bf8e-a75369f34aa9      1
6faaf970-bbbf-43cc-b3ec-3ed5d53d6098      1
708f0b7e-3158-403c-b459-a67ab2188eca      1
Length: 6998, dtype: UInt16

In [248]:
import random

random.seed(SEED)
rand_counter = 0

def get_pseudo_random_int():
    global rand_counter
    rand_counter = rand_counter + 1
    if rand_counter > 40:
        rand_counter = 0
    return random.randint(0, rand_counter+10)

In [249]:
top_k_popular_histories = count_histories.iloc[:K_POPULAR_ITEMS]
top_k_popular_histories_list = list(set(top_k_popular_histories.keys()))

out_k_pop_histories = count_histories.iloc[K_POPULAR_ITEMS:]
out_k_pop_weights = out_k_pop_histories.values

random_k_histories = out_k_pop_histories.sample(n=K_SAMPLED_ITEMS, weights=out_k_pop_weights, random_state=get_pseudo_random_int())
random_k_histories_list = list(set(random_k_histories.keys()))

print(random_k_histories_list)
print(top_k_popular_histories_list+random_k_histories_list)

['ef75bac0-7225-4113-b87e-7fd012c283d3', 'ce32e61c-07ae-4d54-8871-d3b59a198238', '82e7b237-e7ea-4cf5-b44c-55385e2cdbb0', '69296266-b1f0-4c12-bda6-59750ade9770', 'e3238ed6-f1d7-469d-830d-e204e2e0823f']
['4c3d47a1-6f4b-424f-8944-6c227e686c5c', 'd730c4a6-e8f6-4fde-b73a-afbe148479cd', '9c764c3a-f9f8-4fb2-b2c4-6331eaeb3dd6', 'aeab0e46-f1e4-41e9-821b-571255c41f69', 'eb23272d-8e6c-479d-b972-eabeb5f6f3dd', 'ef75bac0-7225-4113-b87e-7fd012c283d3', 'ce32e61c-07ae-4d54-8871-d3b59a198238', '82e7b237-e7ea-4cf5-b44c-55385e2cdbb0', '69296266-b1f0-4c12-bda6-59750ade9770', 'e3238ed6-f1d7-469d-830d-e204e2e0823f']


In [250]:
def format_newuser_input(user_feature_map, user_feature_list):
  normalised_val = 1.0 
  target_indices = []
  for feature in user_feature_list:
    try:
        target_indices.append(user_feature_map[feature])
    except KeyError:
        print("new user feature encountered '{}'".format(feature))
        pass
  #print("target indices: {}".format(target_indices))
  new_user_features = np.zeros(len(user_feature_map.keys()))
  for i in target_indices:
    new_user_features[i] = normalised_val
  new_user_features = sparse.csr_matrix(new_user_features)
  return(new_user_features)

In [251]:
def sample_recommendation_by_title(user_hash,df_news,user_feature_list,item_id_map_reverse,user_feature_map,user_id_map,model):
    try:
        user_x = user_id_map[user_hash]
        scores = model.predict(user_x, np.arange(loaded_n_items)) # means predict for all
    except:
        new_user_features = format_newuser_input(user_feature_map, user_feature_list)
        scores = model.predict(0, np.arange(loaded_n_items), user_features=new_user_features)
    
    top_k_indices = np.argsort(-scores)[:K_LIGHTFM_ITEMS]  # Sort scores in descending order and take the top K_ITEMS
    top_k_items_lfm = [item_id_map_reverse[i] for i in top_k_indices]
    
    random_k_histories = out_k_pop_histories.sample(n=K_SAMPLED_ITEMS, weights=out_k_pop_weights, random_state=get_pseudo_random_int())
    random_k_histories_list = list(set(random_k_histories.keys()))

    top_k_items = list(set(top_k_items_lfm + top_k_popular_histories_list + random_k_histories_list))

    print("Top 5 recommended items:")

    for x in top_k_items:
        row = df_news[df_news["page"] == x]
        print("        %s" % row["title"].values[0])


In [252]:
def get_recommended_history_list(user_hash,user_feature_list,item_id_map_reverse,user_feature_map,user_id_map,model):
    """
    This function verifies if the users is known or new, and makes recommendations depending on this verification.
    The top 5 recommendations from the list are returned.
    """
    try:
        user_x = user_id_map[user_hash]
        scores = model.predict(user_x, np.arange(loaded_n_items)) # means predict for all
    except:
        new_user_features = format_newuser_input(user_feature_map, user_feature_list)
        scores = model.predict(0, np.arange(loaded_n_items), user_features=new_user_features)
    
    top_k_indices = np.argsort(-scores)[:K_LIGHTFM_ITEMS]  # Sort scores in descending order and take the top K_ITEMS
    top_k_items_lfm = [item_id_map_reverse[i] for i in top_k_indices]

    random_k_histories = out_k_pop_histories.sample(n=K_SAMPLED_ITEMS, weights=out_k_pop_weights, random_state=get_pseudo_random_int())
    random_k_histories_list = list(set(random_k_histories.keys()))
    
    top_k_items = list(set(top_k_items_lfm + top_k_popular_histories_list + random_k_histories_list))

    return top_k_items

In [253]:
# # Testing for just one user
# user = 38
# user_feature_list = [df_valid["userType"].iloc[user]]
# user_hash = df_valid["userId"].iloc[user]
# validation_history_hashes = df_valid["history"].iloc[user]

# recommeded_histories = get_recommended_history_list(user_hash,user_feature_list,loaded_item_id_map_reverse,loaded_user_feature_map,loaded_user_id_map,loaded_model)
# print(recommeded_histories)
# print(validation_history_hashes)
# print(validation_history_hashes in recommeded_histories)

In [254]:
def count_valid_recommendations(validation_history_hashes, recommeded_histories):
    """
    This function receives 
    * The `validation_history_hashes` (which is a list of histories contained in the "validacao.csv")
    * The `recommeded_histories` (which are the recommended histories/items by the model)
    Then, it verifies how many recommended items match with the validation histories.
    """
    count_valids = 0
    for valid_hist in validation_history_hashes:
        if valid_hist in recommeded_histories:
            count_valids = count_valids+1
    return count_valids

In [255]:
def check_valid_recommendations(validation_history_hash, recommeded_histories):
    """
    Check if validation_history_hash is in recommeded_histories
    """
    # print(validation_history_hash)
    # print(recommeded_histories)
    recommendation_in_validation_flag = False
    if validation_history_hash in recommeded_histories:
        recommendation_in_validation_flag = True
    return recommendation_in_validation_flag
    

In [256]:
# for index, row in df_valid.iterrows():
#     """
#     For each user, get the top 5 recommendations from the model, and also count how many of them are contained within the validation set.
#     Stores the results on the columns `recommended_hists` `matched_recommendations`.
#     """
#     user_feature_list = [row["userType"]]
#     user_hash = row["userId"]
#     validation_history_hashes = row["history"]

#     recommended_hist = get_recommended_history_list(user_hash,user_feature_list,loaded_item_id_map_reverse,loaded_user_feature_map,loaded_user_id_map,loaded_model)
#     num_valid_recommendations = count_valid_recommendations(validation_history_hashes, recommended_hist)

#     row["recommended_hists"] = recommended_hist
#     row["matched_recommendations"] = num_valid_recommendations

In [257]:
# # exploded_df_valid = exploded_df_valid.head(1000)

# exploded_df_valid.loc[:,"recommended_hists"] = exploded_df_valid.apply(lambda x :
#     get_recommended_history_list(
#         x.userId,
#         [x.userType],
#         loaded_item_id_map_reverse,
#         loaded_user_feature_map,
#         loaded_user_id_map,
#         loaded_model),
#         axis=1)

In [258]:
# import swifter

# exploded_df_valid.loc[:,"recommended_hists"] = exploded_df_valid.swifter.apply(lambda x :
#     get_recommended_history_list(
#         x.userId,
#         [x.userType],
#         loaded_item_id_map_reverse,
#         loaded_user_feature_map,
#         loaded_user_id_map,
#         loaded_model),
#         axis=1)

In [259]:
# import multiprocessing
# from joblib import Parallel, delayed

# results = Parallel(n_jobs=multiprocessing.cpu_count())(
#     exploded_df_valid.apply(lambda x :
#         get_recommended_history_list(
#             x.userId,
#             [x.userType],
#             loaded_item_id_map_reverse,
#             loaded_user_feature_map,
#             loaded_user_id_map,
#             loaded_model),
#             axis=1)
#             )
# exploded_df_valid.loc[:,"recommended_hists"] = results

In [260]:
# import dask.dataframe as dd
# from dask.multiprocessing import get

# ddata = dd.from_pandas(exploded_df_valid, npartitions=3)

# def apply_my_func_to_df(df):
#     df.apply(lambda x :
#         get_recommended_history_list(
#             x.userId,
#             [x.userType],
#             loaded_item_id_map_reverse,
#             loaded_user_feature_map,
#             loaded_user_id_map,
#             loaded_model),
#             axis=1)

# result = ddata.map_partitions(apply_my_func_to_df).compute(get=get)

In [261]:
import dask.dataframe as dd
import dask.multiprocessing

ddf_exploded_valid = dd.from_pandas(exploded_df_valid, npartitions=N_PARTS_DASK)

result = ddf_exploded_valid.apply(lambda x :
    get_recommended_history_list(
        x.userId,
        [x.userType],
        loaded_item_id_map_reverse,
        loaded_user_feature_map,
        loaded_user_id_map,
        loaded_model),
        axis=1,
        meta=('string','string')).compute()

exploded_df_valid["recommended_hists"] = result


In [262]:
# list_ex_0 = exploded_df_valid.loc[1,"recommended_hists"]
# list_ex_0.append(exploded_df_valid.loc[1,"history"])
# list_to_check = list(set(list_ex_0))

# test_history = exploded_df_valid.loc[1,"history"]
# print(test_history)
# print()
# check_valid_recommendations(test_history, list_to_check)

In [263]:
exploded_df_valid["matched_recommendations"] = exploded_df_valid.apply(lambda x :
    check_valid_recommendations(x.history, x.recommended_hists),
    axis=1
    )

In [265]:
exploded_df_valid

Unnamed: 0,userId,userType,history,recommended_hists,matched_recommendations
0,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,Logged,be89a7da-d9fa-49d4-9fdc-388c27a15bc8,"[3b33c0e8-8b98-49f8-953b-20b8111a1c3d, f6603ae...",False
1,e25fbee3a42d45a2914f9b061df3386b2ded2d8cc1f3d4...,Logged,01c59ff6-fb82-4258-918f-2910cb2d4c52,"[b19d8018-1a43-4311-bec1-a6fcec8ac18d, b7cf538...",False
2,d0afad7ea843d86597d822f0df1d39d31a3fea7c39fdee...,Logged,77901133-aee7-4f7b-afc0-652231d76fe9,"[99f4a710-c0c3-463e-909f-2a1665c7f6c7, b7cf538...",False
3,755062dd39a48809880cf363b04268c3af2c003088cde0...,Logged,857aa90f-a7ec-410d-ba82-dfa4f85d4e71,"[be89a7da-d9fa-49d4-9fdc-388c27a15bc8, b7cf538...",False
4,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,Logged,b7b90e18-7613-4ca0-a8fc-fd69addfcd85,"[279ccbff-f203-4c6d-aa48-83d97f085302, b7b90e1...",True
...,...,...,...,...,...
19037,432bd2d2f562efcc59a3943b943b8783788c3526801761...,Logged,35e20551-659a-415f-95fe-7acbb89b69f6,"[0e462e0e-b7e1-48dd-84c4-07e7fd176a4a, 42a8b32...",False
19038,d80b12031cb45593376c1c1776f52091ec24f3b09b671e...,Logged,5f05a0e2-5e92-47e3-b8c2-8d15ee772db3,"[23bd4838-cf3a-40d5-8b27-7d90c1abe71b, 4c3d47a...",False
19039,1146778da61716481bd5bcfd985c948c52671d3d7a47d7...,Logged,f52361e4-9206-49c4-8117-2451c0b0c6f1,"[279ccbff-f203-4c6d-aa48-83d97f085302, 6a83890...",False
19040,0efaf6bce7d8a08922d67dd741f872f0a8c0a6aaea3790...,Logged,3ee8ff4c-2dae-4394-8ff6-abd22556e323,"[279ccbff-f203-4c6d-aa48-83d97f085302, 9c764c3...",False


In [266]:
# user = 1
# user_feature_list = [exploded_df_valid["userType"].iloc[user]]
# user_hash = exploded_df_valid["userId"].iloc[user]
# validation_history_hashes = exploded_df_valid["history"].iloc[user]

# print(user_hash)
# print(user_feature_list)

# recommeded_histories = get_recommended_history_list(user_hash,user_feature_list,loaded_item_id_map_reverse,loaded_user_feature_map,loaded_user_id_map,loaded_model)
# print(recommeded_histories)
# print(validation_history_hashes)
# print(validation_history_hashes in recommeded_histories)

In [267]:
exploded_df_valid[exploded_df_valid["matched_recommendations"]==True]

Unnamed: 0,userId,userType,history,recommended_hists,matched_recommendations
4,ec1639851d99586c7f4da928deb49187303aec6e3b8d66...,Logged,b7b90e18-7613-4ca0-a8fc-fd69addfcd85,"[279ccbff-f203-4c6d-aa48-83d97f085302, b7b90e1...",True
9,a120515626fe5d12b22b7d5a7c5008912cc69284aa26cc...,Logged,9c764c3a-f9f8-4fb2-b2c4-6331eaeb3dd6,"[b7cf538b-7fbc-42ca-ba46-f003db7efeaa, f9044b6...",True
18,fa639a5d4a68ee851643301355a07aeceaaa2817d80cfd...,Logged,4c3d47a1-6f4b-424f-8944-6c227e686c5c,"[7c2c0c12-6dee-4f32-ac9b-15bc975ebbbc, 20eccc8...",True
21,fa639a5d4a68ee851643301355a07aeceaaa2817d80cfd...,Logged,eb23272d-8e6c-479d-b972-eabeb5f6f3dd,"[99f4a710-c0c3-463e-909f-2a1665c7f6c7, 08945c8...",True
79,a6f020ce4721dc0afa88aaeae40ffe31061e505b56d97e...,Logged,eb23272d-8e6c-479d-b972-eabeb5f6f3dd,"[279ccbff-f203-4c6d-aa48-83d97f085302, eb23272...",True
...,...,...,...,...,...
18944,9b5ae6f7b2dc7f46e97d41836abb1d09f5860990da0bee...,Logged,eb23272d-8e6c-479d-b972-eabeb5f6f3dd,"[b7cf538b-7fbc-42ca-ba46-f003db7efeaa, ef75bac...",True
18959,88fe2098b421b70ce3b71893090cda8c377867a3021272...,Logged,31e64f8d-ca41-4831-8673-98c707014658,"[eb23272d-8e6c-479d-b972-eabeb5f6f3dd, 0e462e0...",True
18982,606b3bd2b2c4420d45fad763af0c8eb00ce58216c6f3c1...,Logged,b19d8018-1a43-4311-bec1-a6fcec8ac18d,"[eb23272d-8e6c-479d-b972-eabeb5f6f3dd, b7cf538...",True
18983,9b3dbdd690b14f5b9673c2ccf4458db672f564eb630530...,Logged,d730c4a6-e8f6-4fde-b73a-afbe148479cd,"[e425b416-93ea-40d2-805e-6184dce981cc, eb23272...",True


In [268]:
exploded_df_valid[exploded_df_valid["matched_recommendations"]==True]["userId"].count()

np.int64(2677)