In [3]:
# import libraries

import os
import pprint
import pickle
from typing import Dict, Text
import numpy as np
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import keras
import tensorflow_recommenders as tfrs
from tensorflow.keras.layers import Embedding, Concatenate, Dense, Input

# read the csv to memory
df = pd.read_csv("Preprocessed_data.csv")
df.head()

# Drop 'Unnamed' column
df = df.drop('Unnamed: 0', axis=1)
df.head()

# view information about the dataset
df.info()
df.shape

# extracting the required column for the model and web app
cleaned_data = df[["user_id", "book_title", "rating", "img_l", "book_author"]]

# save the new dataset to memory
cleaned_data.to_csv("filtered_df.csv", index=False)

# Convert the datatypes to TensorFlow datatypes
cleaned_data = df[["user_id", "book_title", "rating", "book_author"]].astype({"user_id": np.str_, 
                                                                               "book_title": np.str_, 
                                                                               "rating": np.float32, 
                                                                               "book_author": np.str_}
)

#The tf.data.Dataset API allows for writing descriptive and efficient input pipelines.
ratings_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(cleaned_data['user_id'], tf.string),
                                                      tf.cast(cleaned_data['book_title'], tf.string),
                                                      tf.cast(cleaned_data['rating'], tf.float32),
                                                      tf.cast(cleaned_data['book_author'], tf.string)
))

# assign names to the TensorFlow datatypes
ratings = ratings_dataset.map(lambda x0, x1, x2, x3: {
    "user_id": x0,
    "book_title": x1,
    "rating": x2,
    "book_author": x3
})

for x in ratings.take(1).as_numpy_iterator():
  pprint.pprint(x)

# split the dataset for training and testing
tf.random.set_seed(1990)
shuffled = ratings.shuffle(100_000, seed=1990, reshuffle_each_iteration=False)

train = shuffled.take(75_000)
test = shuffled.skip(75_000).take(25_000)

# get the unique data 
book_titles = ratings.batch(1_000_000).map(lambda x: x["book_title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])

unique_book_titles = np.unique(np.concatenate(list(book_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

# save the unique data to memory
with open("unique_book_titles.pkl", "wb") as f:
    pickle.dump(unique_book_titles, f)
    
with open("unique_user_ids.pkl", "wb") as f:
    pickle.dump(unique_user_ids, f)
    

# Building the Model Architechture
class RankingModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        # Compute embeddings for users.
        self.user_embeddings = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_user_ids, mask_token=None),
          tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        # Compute embeddings for books.
        self.books_embeddings = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_book_titles, mask_token=None),
          tf.keras.layers.Embedding(len(unique_book_titles) + 1, embedding_dimension)
        ])

        # Compute predictions.
        self.ratings = tf.keras.Sequential([
          # Learn multiple dense layers.
          tf.keras.layers.Dense(256, activation="relu"),
          tf.keras.layers.Dense(64, activation="relu"),
          # Make rating predictions in the final layer.
          tf.keras.layers.Dense(1)
      ])
    
    def call(self, inputs):

        user_id, book_title = inputs

        user_embedding = self.user_embeddings(user_id)
        book_embedding = self.books_embeddings(book_title)
        
        return self.ratings(tf.concat([user_embedding, book_embedding], axis=1))
    
# Reference https://www.tensorflow.org/recommenders/examples/basic_ranking
# Reference https://medium.com/@hamza.emra/introduction-to-recommendation-systems-with-tensorflow-recommenders-a116e5e5a940

# load the loss function metric computation
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

# using TensorFlow libraries to build model
class BookModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer = task

    def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
        return self.ranking_model(
        (features["user_id"], features["book_title"]))

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        labels = features.pop("rating")
    
        rating_predictions = self(features)

        # The task computes the loss and the metrics.
        return self.task(labels=labels, predictions=rating_predictions)
    
# training and compilation
model = BookModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

train_data = train.shuffle(len(train)).batch(256).cache().take(100_000)
test_data = test.batch(256).cache()

model.fit(train_data, epochs=25, validation_data=test_data)

# fitting the model
model.evaluate(test_data, return_dict=True)

# model testing
test_ratings = {}
for book_title in unique_book_titles[:15]:
      test_ratings[book_title.decode("utf-8")] = model({
      "user_id": np.array(["15"]),
      "book_title": np.array([book_title])
  })

for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

# # save the model
# tf.saved_model.save(model, "export")

# #loading the model to confirm functionality
# loaded = tf.saved_model.load("export")

# loaded({"user_id": np.array(["15"]), 
#         "book_title":np.array(["Dark Justice"]), 
#         "book_author":np.array(["Richard Bruce Wright"])
# })



# using cosine similarity
filtered_data = pd.read_csv('filtered_df.csv')
df = filtered_data.copy()

# Step 1: Identify users with more than 200 ratings
x = df.groupby('user_id').count()['rating'] > 200
similar_users = x[x].index

# Step 2: Filter ratings data to include only ratings from similar users
filtered_rating = df[df['user_id'].isin(similar_users)]

# Step 3: Identify books with 50 or more ratings
y = filtered_rating.groupby('book_title').count()['rating'] >= 50
famous_books = y[y].index

# Step 4: Filter ratings data to include only ratings for famous books
final_ratings = filtered_rating[filtered_rating['book_title'].isin(famous_books)]

pt = final_ratings.pivot_table(index='book_title', columns='user_id', values='rating')

pt.fillna(0,inplace=True)
print(pt.head())

# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(pt)

def recommend(book_title, pt, similarity_scores, df):
    
    # Find index of the input book
    index = np.where(pt.index == book_title)[0][0]

    # Sort similar items by similarity score and select top recommendations
    similar_items = sorted(
        ((i, score) for i, score in enumerate(similarity_scores[index])),
        key=lambda x: x[1],
        reverse=True
    )[1:8]  # Only considering the top 7 similar items

    # Initialize a list to store recommended books
    recommended_books = []

    # Loop through the similar items and gather book information for recommendations
    for i, _ in similar_items:
        # Filter the DataFrame to get information about the recommended book
        temp_df = df[df['book_title'] == pt.index[i]]
        book_info = temp_df.drop_duplicates('book_title')[['book_title', "user_id"]].values[0]
        recommended_books.append(f"{book_info[0]} by {book_info[1]}")

    # Return the list of recommended books
    return recommended_books

# input the book to recommend
recommend("Year of Wonders", pt, similarity_scores, df)

# save the model
tf.saved_model.save(model, "export")

#loading the model to confirm functionality
loaded = tf.saved_model.load("export")

loaded({"user_id": np.array(["15"]), 
        "book_title":np.array(["Dark Justice"]), 
        "book_author":np.array(["Richard Bruce Wright"])
})

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1031175 entries, 0 to 1031174
Data columns (total 18 columns):
 #   Column               Non-Null Count    Dtype  
---  ------               --------------    -----  
 0   user_id              1031175 non-null  int64  
 1   location             1031175 non-null  object 
 2   age                  1031175 non-null  float64
 3   isbn                 1031175 non-null  object 
 4   rating               1031175 non-null  int64  
 5   book_title           1031175 non-null  object 
 6   book_author          1031175 non-null  object 
 7   year_of_publication  1031175 non-null  float64
 8   publisher            1031175 non-null  object 
 9   img_s                1031175 non-null  object 
 10  img_m                1031175 non-null  object 
 11  img_l                1031175 non-null  object 
 12  Summary              1031175 non-null  object 
 13  Language             1031175 non-null  object 
 14  Category             1031175 non-null  object 
 15

Epoch 25/25
 Beyond IBM: Leadership Marketing and Finance for the 1990s: [[7.3275423]]
 Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!: [[6.6860013]]
 Dark Justice: [[6.5988684]]
 Final Fantasy Anthology: Official Strategy Guide (Brady Games): [[6.581585]]
 Flight of Fancy: American Heiresses (Zebra Ballad Romance): [[6.4841614]]
 Always Have Popsicles: [[6.481626]]
 God's Little Promise Book: [[6.4058623]]
 Apple Magic (The Collector's series): [[6.386955]]
 Deceived: [[6.356638]]
 Garfield Bigger and Better (Garfield (Numbered Paperback)): [[6.2628455]]
 Ask Lily (Young Women of Faith: Lily Series, Book 5): [[6.1573334]]
 Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth: [[5.837229]]
 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America): [[5.833956]]
 Good Wives: Image and Reality in the Lives of Women in Northern New England, 165



INFO:tensorflow:Assets written to: export\assets


INFO:tensorflow:Assets written to: export\assets


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[6.072784]], dtype=float32)>