### Import the required libraries and load the dataset

In [1]:
#!pip install LibRecommender
import numpy as np
import pandas as pd
import tensorflow as tf
from libreco.data import split_by_ratio_chrono, DatasetFeat
from libreco.algorithms import DeepFM 

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
data = pd.read_csv("../../data/sample_movielens_merged.csv", sep=",", header=0)
data.fillna(value={'age':0,'genre1':'','genre2':'','genre3':'','occupation':'','sex':''},inplace=True)

# split into train and test data based on time
train_data, test_data = split_by_ratio_chrono(data, test_size=0.2)

In [3]:
data.head()

Unnamed: 0,user,item,label,time,sex,age,occupation,genre1,genre2,genre3
0,4617,296,2,964138229,F,25,6,crime,drama,missing
1,1298,208,4,974849526,M,35,6,action,adventure,missing
2,4585,1769,4,964322774,M,35,7,action,thriller,missing
3,3706,1136,5,966376465,M,25,12,comedy,missing,missing
4,2137,1215,3,974640099,F,1,10,action,adventure,comedy


### Building dataset

Before building our final training dataset compatible with LibRecommender, we need to mention

* Sparse columns: Categorical columns

* Dense columns: Numerical columns

* User & Item columns

<b>Note:To use LibRecommender, always maintain the sequence and name of 1st 4 columns as a user, item, label, and time followed by other features (if any)</b>

In [4]:
# specify complete columns information
sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"]
dense_col = ["age"]
user_col = ["sex", "age", "occupation"]
item_col = ["genre1", "genre2", "genre3"]

train_data, data_info = DatasetFeat.build_trainset(train_data, user_col, item_col, sparse_col, dense_col)
test_data = DatasetFeat.build_testset(test_data)

In [5]:
data_info

n_users: 5953, n_items: 3209, data density: 0.4213 %

### Model Training
Time to define our DeepFM model object and call model.fit()

A few points to note about code:

* The task has two options rating or ranking
* Hidden units are for the DNN part of DeepFM

In [6]:
model = DeepFM(
    task="ranking",
    data_info=data_info,
    embed_size=16,
    n_epochs=5,
    lr=1e-4,
    batch_size=64,
    use_bn=True,
    hidden_units=(128, 64, 32),
)

In [7]:
r = model.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    shuffle=True,
    eval_data=test_data,
    metrics=["loss"],
)

Training start time: [35m2024-06-03 12:00:32[0m
Instructions for updating:
Colocations handled automatically by placer.


  net = tf.layers.batch_normalization(net, training=is_training)
Instructions for updating:
Colocations handled automatically by placer.
  net = tf.layers.batch_normalization(net, training=is_training)


total params: [33m184,712[0m | embedding params: [33m157,391[0m | network params: [33m27,321[0m


train: 100%|██████████| 2516/2516 [00:10<00:00, 238.99it/s]


Epoch 1 elapsed: 10.530s
	 [32mtrain_loss: 0.9472[0m


eval_pointwise: 100%|██████████| 5/5 [00:00<00:00, 20.88it/s]


	 eval log_loss: 0.6380


train: 100%|██████████| 2516/2516 [00:09<00:00, 254.62it/s]


Epoch 2 elapsed: 9.883s
	 [32mtrain_loss: 0.5621[0m


eval_pointwise: 100%|██████████| 5/5 [00:00<00:00, 44.62it/s]


	 eval log_loss: 0.6143


train: 100%|██████████| 2516/2516 [00:10<00:00, 245.90it/s]


Epoch 3 elapsed: 10.234s
	 [32mtrain_loss: 0.5355[0m


eval_pointwise: 100%|██████████| 5/5 [00:00<00:00, 38.43it/s]


	 eval log_loss: 0.6102


train: 100%|██████████| 2516/2516 [00:11<00:00, 224.78it/s]


Epoch 4 elapsed: 11.202s
	 [32mtrain_loss: 0.5216[0m


eval_pointwise: 100%|██████████| 5/5 [00:00<00:00, 34.54it/s]


	 eval log_loss: 0.6117


train: 100%|██████████| 2516/2516 [00:09<00:00, 252.59it/s]


Epoch 5 elapsed: 9.966s
	 [32mtrain_loss: 0.5072[0m


eval_pointwise: 100%|██████████| 5/5 [00:00<00:00, 39.97it/s]


	 eval log_loss: 0.6115


### Using trained model

In [11]:
# predict preference of user 2211 to item 110
model.predict(user=2211, item=110)

0.8990383

In [12]:
# recommend 7 items for user 2211
model.recommend_user(user=2211, n_rec=7)

{2211: array([1198, 1265, 1210, 1266,  608,  527, 2997], dtype=int64)}

In [13]:
# cold-start prediction
model.predict(user="ccc", item="not item", cold_start="average")

[31mDetect 1 unknown interaction(s), position: [0][0m


0.64209944

In [14]:
# cold-start recommendation
model.recommend_user(user="are we good?", n_rec=7, cold_start="popular")

[31mDetect unknown user: are we good?[0m


{'are we good?': array([ 480, 1573,   50, 1193, 1193, 1393,  480])}

In [17]:
# Predict preference for top recommended
for i in [1198, 1265, 1210, 1266,  608,  527, 2997]:
    print(model.predict(user=2211, item=i))

0.94304967
0.9350213
0.9214089
0.92063814
0.91628814
0.91500586
0.9149309
