##### Copyright 2020 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TensorFlow Recommenders: Quickstart

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/recommenders/quickstart"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/recommenders/blob/main/docs/examples/quickstart.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/recommenders/docs/examples/quickstart.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

In this tutorial, we build a simple matrix factorization model using the [MovieLens 100K dataset](https://grouplens.org/datasets/movielens/100k/) with TFRS. We can use this model to recommend movies for a given user.

### Import TFRS

First, install and import TFRS:

In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

In [None]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

### Read the data

In [None]:
# import libraries

import pandas as pd
import numpy as np
import datetime as dt


# links dataset

users = pd.read_csv('/content/survey_users.csv')
print(users.shape)
print(users.info())
users.head()



# titles dataset

titles = pd.read_csv('/content/survey_titles.csv')
print(titles.shape)
print(titles.info())
titles.head()

titles.TitleId.nunique()


# ratings dataset

ratings_df = pd.read_csv('/content/survey_ratings.csv')
print(ratings_df.shape)
print(ratings_df.info())
ratings_df.head()


In [None]:
# datasets
ratingsPath = '/content/survey_ratings.csv'
titlePath = '/content/survey_titles.csv'
userPath = '/content/survey_users.csv'

In [None]:
# merging titles and ratings datasets to perform EDA

merged_df = pd.merge(ratings_df, titles, on = 'TitleId', how = 'outer')
merged_df

In [None]:
merged_df.drop(columns = 'Unnamed: 0', inplace = True)

merged_df.isnull().sum()

In [None]:
# checkout number of unique data points
print('unique users: ', merged_df['UserId'].nunique())
print('unique titles: ', merged_df['TitleId'].nunique())
print('unique ratings', merged_df['Rating'].unique())

In [None]:
merged_df

In [None]:
# convert user  to sequential numerical values
user_ids = merged_df['UserId'].unique()
user_id_map = {user_id: i for i, user_id in enumerate(user_ids)}
merged_df['UserSeqId'] = merged_df['UserId'].map(user_id_map)


In [None]:
# convert title IDs  to sequential numerical values
title_ids = merged_df['TitleId'].unique()
title_id_map = {title_id: i for i, title_id in enumerate(title_ids)}
merged_df['TitleSeqId'] = merged_df['TitleId'].map(title_id_map)

In [None]:
from sklearn.model_selection import train_test_split
# split the data into training and testing sets
train_data, test_data = train_test_split(merged_df, test_size=0.2, random_state=42)

In [None]:
merged_df

In [None]:
# Define the model architecture
num_users = len(user_ids)
num_titles = len(title_ids)
embedding_size = 100 # latent features dimensions

# prepare the inputs for the neural network
user_input = tf.keras.Input(shape=(1,))
title_input = tf.keras.Input(shape=(1,))

# define the input embeddings
user_embedding = tf.keras.layers.Embedding(num_users, embedding_size)(user_input)
title_embedding = tf.keras.layers.Embedding(num_titles, embedding_size)(title_input)

# falttent the embeddings to reduce the dimensionality
user_flatten = tf.keras.layers.Flatten()(user_embedding)
title_flatten = tf.keras.layers.Flatten()(title_embedding)

# combine the input into a single input
concatenated = tf.keras.layers.Concatenate()([user_flatten, title_flatten])

# define dense layer 1 and dropout layer 1
dense1 = tf.keras.layers.Dense(64, activation='relu')(concatenated)
dropout1 = tf.keras.layers.Dropout(0.2)(dense1)

# define dense layer 2 and dropout layer 2
dense2 = tf.keras.layers.Dense(32, activation='relu')(dropout1)
dropout2 = tf.keras.layers.Dropout(0.2)(dense2)

# define dense layer 3 and dropout layer 3
dense3 = tf.keras.layers.Dense(16, activation='relu')(dropout2)
dropout3 = tf.keras.layers.Dropout(0.2)(dense3)

# define dense layer 4 and dropout layer 4
dense4 = tf.keras.layers.Dense(8, activation='relu')(dropout3)
dropout4 = tf.keras.layers.Dropout(0.2)(dense4)

# define dense layer 5 and output layer
dense5 = tf.keras.layers.Dense(4, activation='relu')(dropout4)
output = tf.keras.layers.Dense(1, activation='relu')(dense5)

# instantiate the model
model = tf.keras.Model(inputs=[user_input, title_input], outputs=output)

# compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

# train the model
model_hist = model.fit([train_data['UserSeqId'], train_data['TitleSeqId']], train_data['Rating'],
          batch_size=32, epochs=50, validation_data=([test_data['UserSeqId'], test_data['TitleSeqId']], test_data['Rating']))

# evaluate the model
mse = model.evaluate([test_data['UserSeqId'], test_data['TitleSeqId']], test_data['Rating'])
print('Mean Squared Error:', mse)

In [None]:
import os
import pickle
# Specify the folder path
model_folder_path = 'Models/NN_Current_Iter/'

# Create the folder if it doesn't exist
os.makedirs(model_folder_path, exist_ok=True)
# Save the model architecture
model_architecture_path = 'Models/NN_Current_Iter/neural_net_architecture_curr.pkl'
with open(model_architecture_path, 'wb') as f:
    pickle.dump(model.to_json(), f)

# Save the model weights
model_weights_path = 'Models/NN_Current_Iter/neural_net_weights_curr.pkl'
model.save_weights(model_weights_path)

In [None]:
model_architecture_path = 'Models/NN_Current_Iter/neural_net_architecture_curr.pkl'
with open(model_architecture_path, 'rb') as f:
    loaded_model_architecture = pickle.load(f)

loaded_model = tf.keras.models.model_from_json(loaded_model_architecture)

model_weights_path = 'Models/NN_Current_Iter/neural_net_weights_curr.pkl'
loaded_model.load_weights(model_weights_path)

In [None]:
# define function to calculate overall precision, recall and f1scores
def precision_recall_f1(actual_ratings, predicted_ratings, threshold=3):

    # convert the ratings and predictions to binary form based on the threshold
    actual_ratings_binary = np.array(actual_ratings) >= threshold
    predicted_ratings_binary = np.array(predicted_ratings) >= threshold

    # Calculate precision, recall, and F1 score
    precision = precision_score(actual_ratings_binary, predicted_ratings_binary)
    recall = recall_score(actual_ratings_binary, predicted_ratings_binary)
    f1_score = (2 * precision * recall) / (precision + recall)

    return precision, recall, f1_score

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import precision_score, recall_score
import math

# make predictions
predictions_nn = loaded_model.predict([test_data['UserSeqId'], test_data['TitleSeqId']]).flatten()

# compute metrics
mse = mean_squared_error(test_data['Rating'], predictions_nn)
rmse = math.sqrt(mse)
mae = mean_absolute_error(test_data['Rating'], predictions_nn)
precision, recall, f1 = precision_recall_f1(np.array(test_data['Rating']), predictions_nn)

# display the metrics
print('MSE:', mse)
print('RMSE:', rmse)
print('MAE:', mae)
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 score: {f1:.2f}")

In [None]:
# define function to get top_n recommendations above a threshold ratings
def get_top_recommendations(model, user_id, n, thres=0):

    final_recommendation = []

    # get the user's sequential ID
    user_seq_id = user_id_map[user_id]

    num_users = len(user_ids)
    num_titles = len(title_ids)

    # get the inputs for the model
    title_seq_ids = np.arange(num_titles)
    user_seq_ids = np.repeat(user_seq_id, num_titles)

    # get the predictions from the neural network
    predictions = model.predict([user_seq_ids, title_seq_ids])

    # create a DataFrame with title IDs and predicted ratings
    recommendations_df = pd.DataFrame({'TitleSeqId': title_seq_ids, 'PredictedRating': predictions.flatten()})

    # remove the recommendations that are already seen by the user
    seen_title_ids = merged_df[merged_df['UserSeqId'] == user_seq_id]['TitleSeqId'].values
    recommendations_df = recommendations_df[~recommendations_df['TitleSeqId'].isin(seen_title_ids)]

    # Sort the recommendations by predicted rating in descending order and select the top N titles
    top_recommendations = recommendations_df.sort_values(by='PredictedRating', ascending=False)
    top_recommendations = top_recommendations[top_recommendations['PredictedRating'] >=thres].head(n)

    # add the recommendations and respective predicted ratings as a tuple to a list
    for _, row in top_recommendations.iterrows():
        title_seq_id = row['TitleSeqId']
        predicted_rating = row['PredictedRating']
        title_name = merged_df[merged_df['TitleSeqId'] == title_seq_id]['TitleName'].values[0]
        final_recommendation.append((title_name, predicted_rating))

    return final_recommendation

In [None]:
# display the intended number of recommendations for selected user

user_id = 35
top_n = 10

recommendations = get_top_recommendations(loaded_model, user_id, top_n)
for i in recommendations:
    print(i)