In [1]:
cd C:\Users\saura\Recommenders

C:\Users\saura\Recommenders


In [2]:
conda activate reco_base


Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install tensorflow==1.15.3

Note: you may need to restart the kernel to use updated packages.


In [1]:
import tensorflow as tf

In [2]:
print("Tensorflow version : {}".format(tf.__version__))

Tensorflow version : 2.2.0


In [None]:
#load libraries

from __future__ import print_function
from __future__ import absolute_import
from __future__ import division

# set the environment path to find Recommenders
import sys
sys.path.append("../../")

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline

import papermill as pm

from reco_utils.recommender.rbm.rbm import RBM
from reco_utils.dataset.python_splitters import numpy_stratified_split
from reco_utils.dataset.sparse import AffinityMatrix


from reco_utils.dataset import movielens
from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k

#For interactive mode only
%load_ext autoreload
%autoreload 2

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version : {}".format(tf.__version__))

In [None]:
# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

In [None]:
data = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    header=['userID','movieID','rating','timestamp']
)

# Convert to 32-bit in order to reduce memory consumption 
data.loc[:, 'rating'] = data['rating'].astype(np.int32) 


data.head()

In [None]:

#to use standard names across the analysis 
header = {
        "col_user": "userID",
        "col_item": "movieID",
        "col_rating": "rating",
    }

#instantiate the sparse matrix generation  
am = AffinityMatrix(DF = data, **header)

#obtain the sparse matrix 
X = am.gen_affinity_matrix()

In [None]:
Xtr, Xtst = numpy_stratified_split(X)

In [None]:
print('train matrix size', Xtr.shape)
print('test matrix size', Xtst.shape)

In [None]:
import tensorflow as tf
from tensorflow import set_random_seed

In [None]:
tf.random.set_seed()

In [None]:
#First we initialize the model class
model = RBM(hidden_units= 600, training_epoch = 30, minibatch_size= 60, keep_prob=0.9,with_metrics =True)

In [None]:
#Model Fit
train_time= model.fit(Xtr, Xtst)

In [None]:

#number of top score elements to be recommended  
K = 10

#Model prediction on the test set Xtst. 
top_k, test_time =  model.recommend_k_items(Xtst)

In [None]:
top_k_df = am.map_back_sparse(top_k, kind = 'prediction')
test_df = am.map_back_sparse(Xtst, kind = 'ratings')

In [None]:
top_k_df.head(10)

In [None]:
def ranking_metrics(
    data_size,
    data_true,
    data_pred,
    time_train,
    time_test,
    K
):

    eval_map = map_at_k(data_true, data_pred, col_user="userID", col_item="movieID", 
                    col_rating="rating", col_prediction="prediction", 
                    relevancy_method="top_k", k= K)

    eval_ndcg = ndcg_at_k(data_true, data_pred, col_user="userID", col_item="movieID", 
                      col_rating="rating", col_prediction="prediction", 
                      relevancy_method="top_k", k= K)

    eval_precision = precision_at_k(data_true, data_pred, col_user="userID", col_item="movieID", 
                               col_rating="rating", col_prediction="prediction", 
                               relevancy_method="top_k", k= K)

    eval_recall = recall_at_k(data_true, data_pred, col_user="userID", col_item="movieID", 
                          col_rating="rating", col_prediction="prediction", 
                          relevancy_method="top_k", k= K)

    
    df_result = pd.DataFrame(
        {   "Dataset": data_size,
            "K": K,
            "MAP": eval_map,
            "nDCG@k": eval_ndcg,
            "Precision@k": eval_precision,
            "Recall@k": eval_recall,
            "Train time (s)": time_train,
            "Test time (s)": time_test
        }, 
        index=[0]
    )
    
    return df_result

In [None]:
eval_100k= ranking_metrics(
    data_size = "mv 100k",
    data_true =test_df,
    data_pred =top_k_df,
    time_train=train_time,
    time_test =test_time,
    K =10)

eval_100k

In [None]:
import scrapbook as sb

# Record results with papermill for tests
sb.glue("map", eval_100k['MAP'][0])
sb.glue("ndcg", eval_100k['nDCG@k'][0])
sb.glue("precision", eval_100k['Precision@k'][0])
sb.glue("recall", eval_100k['Recall@k'][0])
sb.glue("train_time", train_time)
sb.glue("test_time", test_time)

In [None]:
import joblib
# save the model to disk
filename = 'RBM.sav'
joblib.dump(model, filename)