# Model recommendation with lighfm

### Import libraries

In [1]:
import pandas as pd
import json

### Loading Path Infos from Config (JSON) File

In [2]:
with open('config.json', 'r') as f:
    config = json.load(f)

### Loading Custom Functions and Artifacts for Model

In [3]:
# import pickle
from utils.custom_data_structs import UserItemData
from utils.model_funcs import count_histories_by_popularity
from utils.model_funcs import get_df_random_histories
from utils.model_funcs import get_dict_most_popular_histories, get_dict_random_histories

### Defining some parameters and dtypes

In [4]:
dtype_df_valid = {
"userId" : 'string',
"userType" : 'category',
"history" : 'string',
"timestampHistory" : 'string'
}

dtype_df_train_score = {
"userId" : 'string',
"userType" : 'category',
"history" : 'string',
"score" : 'Float32',
"historyFreshnessNormalized" : 'Float32'
}

LIMIT_N_ROWS = 500000
N_PARTS_DASK = 3

## READ DATA

### Retrieve Validation Data (df_valid)

In [5]:
df_merged = pd.read_csv(config["DF_TRAIN_SCORES"], dtype=dtype_df_train_score)
df_merged.drop(columns=["Unnamed: 0"],inplace=True)
df_merged

Unnamed: 0,userId,history,userType,historyFreshnessNormalized,score
0,fbb963d61eb8149e7f43b1bd905457ba5e106a830ddc27...,80aa7bb2-adce-4a55-9711-912c407927a1,Non-Logged,0.980416,0.704604
1,fbb963d61eb8149e7f43b1bd905457ba5e106a830ddc27...,d9e5f15d-b441-4d8b-bee4-462b106d3916,Non-Logged,0.613061,0.721303
2,17f1083e6079b0f28f7820a6803583d1c1b405c0718b11...,e273dba4-136c-45fb-bdd6-0cc57b13aaf0,Non-Logged,0.880859,0.637834
3,528a8d7a2af73101da8d6709c1ec875b449a5a58749a99...,a0562805-c7d1-4ffd-b622-87c50ae006f4,Non-Logged,0.945895,0.622225
4,2dd18b58a634a4e77181a202cf152df6169dfb3e4230ef...,233f8238-2ce0-470f-a9d5-0e0ac530382a,Non-Logged,0.13293,0.68147
...,...,...,...,...,...
6335310,5889d6ebbf62e6c115e0a280063dc8189cca490cbfea56...,7a349b09-badc-40a9-a194-83d959aeb50c,Non-Logged,0.966442,0.663881
6335311,5889d6ebbf62e6c115e0a280063dc8189cca490cbfea56...,6f344c45-e731-41b4-8c65-9967ebc03096,Non-Logged,0.937478,0.843734
6335312,5889d6ebbf62e6c115e0a280063dc8189cca490cbfea56...,4c586bb4-f71d-4b39-9df8-e38ac3f632a0,Non-Logged,0.939154,0.473613
6335313,5889d6ebbf62e6c115e0a280063dc8189cca490cbfea56...,855d20b7-53f2-4678-a10f-55402d085018,Non-Logged,0.929145,0.669966


## START RECOMMENDATIONS

## Get most popular items from the "exploded_df_valid"

In [6]:
df_count_histories = count_histories_by_popularity(df_merged)
dict_popular_histories = get_dict_most_popular_histories(df_count_histories)
df_count_histories[:10].describe()

Unnamed: 0,historyFreshnessNormalized,counts
count,10.0,10.0
mean,0.963946,14598.3
std,0.022796,2365.938905
min,0.929145,12159.0
25%,0.944652,13081.25
50%,0.966443,13706.5
75%,0.983063,15187.0
max,0.992829,19639.0


## Exemple on how to use "recommend_by_weighted_random" function
This is used to add a "random" factor to the recommendation, thus, adding an "exploration phase" to the algorithm.
* This "guided random" recommendation shows potential unusual items to the user
* `df_count_histories`: store the df once, so it can be reused for each user later
* `df_unpopular_histories`: composed by all "history" values not on `top_k_popular`
* `unpopular_weights`: most popular histories have more chance to be selected.

In [7]:
df_random = get_df_random_histories(df_count_histories)
dict_random_histories = get_dict_random_histories(df_random)
df_random.describe()

Unnamed: 0,historyFreshnessNormalized,counts
count,1000.0,1000.0
mean,0.975285,424.594
std,0.013922,364.49992
min,0.950985,16.0
25%,0.96471,77.0
50%,0.976902,336.0
75%,0.987488,687.0
max,1.0,1253.0


In [8]:
import pickle

pickle.dump(dict_popular_histories, open('artifacts/dict_popular_histories.pkl', 'wb'))
pickle.dump(dict_random_histories, open('artifacts/dict_random_histories.pkl', 'wb'))

In [1]:
import pickle
from utils.model_funcs import read_popular_dict_into_list
from utils.model_funcs import read_random_dict_into_list

In [2]:
loaded_dict_popular_histories = pickle.load(open('artifacts/dict_popular_histories.pkl', 'rb'))
loaded_dict_random_histories = pickle.load(open('artifacts/dict_random_histories.pkl', 'rb'))

In [3]:
loaded_dict_popular_histories

{'history': {0: 'd2593c3d-2347-40d9-948c-b6065e8459a9',
  1: 'f6b5d170-48b9-4f8e-88d4-c84b6668f3bd',
  2: '1f32787b-de2b-49be-8c20-ddaeae34cc22',
  3: '6a83890a-d9e9-4f6b-a6c6-90d031785bbf',
  4: 'f0a78e58-ec7e-494c-9462-fbd6446a9a89',
  5: '4c63d7cd-4902-4ffb-9b94-578b1b2151f0',
  6: '855d20b7-53f2-4678-a10f-55402d085018'},
 'historyFreshnessNormalized': {0: 0.9408344626426697,
  1: 0.9561064839363098,
  2: 0.9928287267684937,
  3: 0.9681763052940369,
  4: 0.9804162383079529,
  5: 0.935804545879364,
  6: 0.9291447997093201},
 'counts': {0: 19639,
  1: 17664,
  2: 15435,
  3: 14443,
  4: 13778,
  5: 13635,
  6: 13550}}

In [4]:
loaded_dict_random_histories

{'history': {2003: '947ec775-7b19-4956-9122-8423147a9363',
  20356: '618eff25-1867-48c4-833e-f96dc1cc38eb',
  5493: 'a5d73397-8c94-4452-ac83-a2c58581c617',
  3321: '52305c5a-0485-4117-9c94-b6754267287f',
  1329: '9b4c0708-c1db-454b-992e-fc33f04853c8',
  1112: '5dc6321c-4aad-4c69-bb5f-b0355b43e243',
  11661: '2d8cf4a1-8146-4423-a250-67a00222e4be',
  3345: '4498a685-2b13-430e-b488-284d2c103734',
  4898: '0ceb0236-4c1a-46ca-9bef-147122729c79',
  1042: '3b8cd1b0-4cbf-44a5-9261-7b1dba64fcc2',
  23659: '3c0523d5-bf81-4646-b294-765e095e0541',
  9488: '0c3442f3-b154-47bc-abea-4d4aaf74d69b',
  1475: '6dcc1adc-251d-41c6-b7cc-50f4b00181c5',
  1389: '21e9fd5c-7244-49be-8642-f50a069d102f',
  1394: '2d891155-fbac-4f54-8949-a1d8fb42839f',
  1749: 'f8f8fe7a-1ca9-4c0b-8347-eee307a0c45c',
  2747: 'f4c02139-4b4f-4da6-975d-6ffeb91a3bb6',
  2246: 'a61b04eb-6ada-4d82-843c-4e8d4733b965',
  1708: 'a80614bb-c02c-4837-96ff-cb9239189dcd',
  3446: 'be81f38c-fc94-4d30-90cc-4136b01dc83d',
  1291: '31b4ea4e-3092-457

In [5]:
top_k_popular = read_popular_dict_into_list(loaded_dict_popular_histories["history"])
print(top_k_popular)

['d2593c3d-2347-40d9-948c-b6065e8459a9', 'f6b5d170-48b9-4f8e-88d4-c84b6668f3bd', '1f32787b-de2b-49be-8c20-ddaeae34cc22', '6a83890a-d9e9-4f6b-a6c6-90d031785bbf', 'f0a78e58-ec7e-494c-9462-fbd6446a9a89', '4c63d7cd-4902-4ffb-9b94-578b1b2151f0', '855d20b7-53f2-4678-a10f-55402d085018']


In [6]:
random_k_histories = read_random_dict_into_list(loaded_dict_random_histories["history"])
print(random_k_histories)

['6b9af32b-d168-4acd-a6e3-99665e6ee0c2', 'f82ac580-aa0c-4165-a357-27e12cb4f3cf', 'a9d50956-b9dd-475c-a3d5-20b141c3fcd8', 'd755b265-ef3a-4977-80a4-b75ab44944a0', '9410dadd-0d3b-43de-9186-e934a3da6be3', '1c663751-20d8-4764-bcee-25bbe1a5b287']
