In [1]:
# import libraries

import os
import pprint
import pickle
from typing import Dict, Text
import numpy as np
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import keras
import tensorflow_recommenders as tfrs
from tensorflow.keras.layers import Embedding, Concatenate, Dense, Input

In [2]:
# read the csv to memory
df = pd.read_csv("Preprocessed_data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,user_id,location,age,isbn,rating,book_title,book_author,year_of_publication,publisher,img_s,img_m,img_l,Summary,Language,Category,city,state,country
0,0,2,"stockton, california, usa",18.0,195153448,0,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,Provides an introduction to classical myths pl...,en,['Social Science'],stockton,california,usa
1,1,8,"timmins, ontario, canada",34.7439,2005018,5,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],timmins,ontario,canada
2,2,11400,"ottawa, ontario, canada",49.0,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],ottawa,ontario,canada
3,3,11676,"n/a, n/a, n/a",34.7439,2005018,8,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],,,
4,4,41385,"sudbury, ontario, canada",34.7439,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],sudbury,ontario,canada


In [3]:
# Drop 'Unnamed' column
df = df.drop('Unnamed: 0', axis=1)
df.head()

# view information about the dataset
df.info()
df.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1031175 entries, 0 to 1031174
Data columns (total 18 columns):
 #   Column               Non-Null Count    Dtype  
---  ------               --------------    -----  
 0   user_id              1031175 non-null  int64  
 1   location             1031175 non-null  object 
 2   age                  1031175 non-null  float64
 3   isbn                 1031175 non-null  object 
 4   rating               1031175 non-null  int64  
 5   book_title           1031175 non-null  object 
 6   book_author          1031175 non-null  object 
 7   year_of_publication  1031175 non-null  float64
 8   publisher            1031175 non-null  object 
 9   img_s                1031175 non-null  object 
 10  img_m                1031175 non-null  object 
 11  img_l                1031175 non-null  object 
 12  Summary              1031175 non-null  object 
 13  Language             1031175 non-null  object 
 14  Category             1031175 non-null  object 
 15

(1031175, 18)

### Data Preprocessing

In [4]:
# extracting the required column for the model and web app
cleaned_data = df[["user_id", "book_title", "rating", "img_l", "book_author"]]

# save the new dataset to memory
cleaned_data.to_csv("filtered_df.csv", index=False)

In [5]:
# Convert the datatypes to TensorFlow datatypes
cleaned_data = df[["user_id", "book_title", "rating", "book_author"]].astype({"user_id": np.str_, 
                                                                               "book_title": np.str_, 
                                                                               "rating": np.float32, 
                                                                               "book_author": np.str_}
)

In [6]:
#The tf.data.Dataset API allows for writing descriptive and efficient input pipelines.
ratings_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(cleaned_data['user_id'], tf.string),
                                                      tf.cast(cleaned_data['book_title'], tf.string),
                                                      tf.cast(cleaned_data['rating'], tf.float32),
                                                      tf.cast(cleaned_data['book_author'], tf.string)
))

In [7]:
# assign names to the TensorFlow datatypes
ratings = ratings_dataset.map(lambda x0, x1, x2, x3: {
    "user_id": x0,
    "book_title": x1,
    "rating": x2,
    "book_author": x3
})

for x in ratings.take(1).as_numpy_iterator():
  pprint.pprint(x)


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
{'book_author': b'Mark P. O. Morford',
 'book_title': b'Classical Mythology',
 'rating': 0.0,
 'user_id': b'2'}


In [8]:
# split the dataset for training and testing
tf.random.set_seed(1990)
shuffled = ratings.shuffle(100_000, seed=1990, reshuffle_each_iteration=False)

train = shuffled.take(75_000)
test = shuffled.skip(75_000).take(25_000)


### Getting the Unique Data

In [9]:
# get the unique data 
book_titles = ratings.batch(1_000_000).map(lambda x: x["book_title"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])

unique_book_titles = np.unique(np.concatenate(list(book_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))


### Saving the unique IDs and Book Titles for App Dev

In [10]:
# save the unique data to memory
with open("unique_book_titles.pkl", "wb") as f:
    pickle.dump(unique_book_titles, f)
    
with open("unique_user_ids.pkl", "wb") as f:
    pickle.dump(unique_user_ids, f)
    


### Two Tower Recommenders

In [11]:
# Building the Model Architechture
class RankingModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        # Compute embeddings for users.
        self.user_embeddings = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_user_ids, mask_token=None),
          tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        # Compute embeddings for books.
        self.books_embeddings = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_book_titles, mask_token=None),
          tf.keras.layers.Embedding(len(unique_book_titles) + 1, embedding_dimension)
        ])

        # Compute predictions.
        self.ratings = tf.keras.Sequential([
          # Learn multiple dense layers.
          tf.keras.layers.Dense(256, activation="relu"),
          tf.keras.layers.Dense(64, activation="relu"),
          # Make rating predictions in the final layer.
          tf.keras.layers.Dense(1)
      ])
    
    def call(self, inputs):

        user_id, book_title = inputs

        user_embedding = self.user_embeddings(user_id)
        book_embedding = self.books_embeddings(book_title)
        
        return self.ratings(tf.concat([user_embedding, book_embedding], axis=1))

    

# Reference https://www.tensorflow.org/recommenders/examples/basic_ranking
# Reference https://medium.com/@hamza.emra/introduction-to-recommendation-systems-with-tensorflow-recommenders-a116e5e5a940


### Model Loss and Metric

In [12]:
# load the loss function metric computation
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)


### Full Model

In [13]:
# using TensorFlow libraries to build model
class BookModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer = task

    def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
        return self.ranking_model(
        (features["user_id"], features["book_title"]))

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        labels = features.pop("rating")
    
        rating_predictions = self(features)

        # The task computes the loss and the metrics.
        return self.task(labels=labels, predictions=rating_predictions)
    

### Model Training and Fitting

In [14]:
# fitting and training
model = BookModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

train_data = train.shuffle(len(train)).batch(256).cache().take(100_000)
test_data = test.batch(256).cache()

model.fit(train_data, epochs=25, validation_data=test_data)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x2172f11c8e0>

### Model Evaluation

In [15]:
# Evaluating the model
model.evaluate(test_data, return_dict=True)




{'root_mean_squared_error': 4.694451332092285,
 'loss': 24.672855377197266,
 'regularization_loss': 0,
 'total_loss': 24.672855377197266}

### Model Testing

In [16]:
# model testing
test_ratings = {}
for book_title in unique_book_titles[:15]:
      test_ratings[book_title.decode("utf-8")] = model({
      "user_id": np.array(["15"]),
      "book_title": np.array([book_title])
  })

for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

 A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America): [[5.160743]]
 Beyond IBM: Leadership Marketing and Finance for the 1990s: [[5.123688]]
 Apple Magic (The Collector's series): [[4.7389765]]
 Deceived: [[4.6452193]]
 Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!: [[4.520405]]
 Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth: [[4.3941865]]
 Dark Justice: [[4.1910596]]
 Garfield Bigger and Better (Garfield (Numbered Paperback)): [[4.0861483]]
 Always Have Popsicles: [[3.9559033]]
 Flight of Fancy: American Heiresses (Zebra Ballad Romance): [[3.9290657]]
 Ask Lily (Young Women of Faith: Lily Series, Book 5): [[3.867418]]
 Final Fantasy Anthology: Official Strategy Guide (Brady Games): [[3.8005188]]
 Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750: [[3.4033701]]
 Clifford Visita El Hospital (

### Save Model for App Development

In [22]:
# save the model
tf.saved_model.save(model, "saved_index")

#loading the model to confirm functionality
loaded = tf.saved_model.load("saved_index")

loaded({"user_id": np.array(["15"]), 
        "book_title":np.array(["Dark Justice"]), 
        "book_author":np.array(["Richard Bruce Wright"])
})



INFO:tensorflow:Assets written to: export\assets


INFO:tensorflow:Assets written to: export\assets


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[5.6822557]], dtype=float32)>

### Cosine Similarity

In [17]:
# using cosine similarity
filtered_data = pd.read_csv('filtered_df.csv')
df = filtered_data.copy()

In [18]:
# Step 1: Identify users with more than 200 ratings
x = df.groupby('user_id').count()['rating'] > 200
similar_users = x[x].index

In [19]:
# Step 2: Filter ratings data to include only ratings from similar users
filtered_rating = df[df['user_id'].isin(similar_users)]


In [20]:
# Step 3: Identify books with 50 or more ratings
y = filtered_rating.groupby('book_title').count()['rating'] >= 50
famous_books = y[y].index


In [21]:
# Step 4: Filter ratings data to include only ratings for famous books
final_ratings = filtered_rating[filtered_rating['book_title'].isin(famous_books)]


In [22]:
#table
pt = final_ratings.pivot_table(index='book_title', columns='user_id', values='rating')

pt.fillna(0,inplace=True)
print(pt.head())

user_id              254     2276    2766    2977    3363    4017    4385    \
book_title                                                                    
1984                    9.0     0.0     0.0     0.0     0.0     0.0     0.0   
1st to Die: A Novel     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2nd Chance              0.0    10.0     0.0     0.0     0.0     0.0     0.0   
4 Blondes               0.0     0.0     0.0     0.0     0.0     0.0     0.0   
A Bend in the Road      0.0     0.0     7.0     0.0     0.0     0.0     0.0   

user_id              6251    6323    6543    ...  271705  273979  274004  \
book_title                                   ...                           
1984                    0.0     0.0     0.0  ...    10.0     0.0     0.0   
1st to Die: A Novel     0.0     0.0     9.0  ...     0.0     0.0     0.0   
2nd Chance              0.0     0.0     0.0  ...     0.0     0.0     0.0   
4 Blondes               0.0     0.0     0.0  ...     0.0     0.0  

### Calculating the Similarity Score

In [28]:
# Calculate similarity scores using cosine similarity
similarity_scores = cosine_similarity(pt)

def recommend(book_title, pt, similarity_scores, df):
    
    # Find index of the input book
    index = np.where(pt.index == book_title)[0][0]

    # Sort similar items by similarity score and select top recommendations
    similar_items = sorted(
        ((i, score) for i, score in enumerate(similarity_scores[index])),
        key=lambda x: x[1],
        reverse=True
    )[1:8]  # Only considering the top 7 similar items

    # Initialize a list to store recommended books
    recommended_books = []

    # Loop through the similar items and gather book information for recommendations
    for i, _ in similar_items:
        # Filter the DataFrame to get information about the recommended book
        temp_df = df[df['book_title'] == pt.index[i]]
        book_info = temp_df.drop_duplicates('book_title')[['book_title', "book_author"]].values[0]
        recommended_books.append(f"{book_info[0]} by {book_info[1]}")

    # Return the list of recommended books
    return recommended_books

# input the book to recommend
recommend("The Mists of Avalon", pt, similarity_scores, df)


["Harry Potter and the Sorcerer's Stone (Book 1) by J. K. Rowling",
 'Year of Wonders by Geraldine Brooks',
 'Harry Potter and the Chamber of Secrets (Book 2) by J. K. Rowling',
 'Dragonfly in Amber by DIANA GABALDON',
 'Harry Potter and the Order of the Phoenix (Book 5) by J. K. Rowling',
 'The Valley of Horses by JEAN M. AUEL',
 'The Hundred Secret Senses by Amy Tan']

In [1]:
import streamlit as st
import pickle
import pandas as pd
import tensorflow as tf
import numpy as np

# from functions import *


st.set_page_config(layout="centered")
with open("style.css") as f:
    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)

st.title("📚Book Recommendation Web App")
st.markdown("#")
st.markdown("#")


st.sidebar.markdown(f" ## :gear: Recommendation Settings")
st.sidebar.markdown("---")
no_of_rec = int(st.sidebar.slider("Select Number of Book Recommendations", 1, 50, 10))
n_cols = st.sidebar.number_input("Select Number of columns", 5)
n_cols = int(n_cols)


@st.cache_resource
def load_data():
    df = pd.read_csv("filtered_df.csv")

    book_titles = pickle.load(open("unique_book_titles.pkl", "rb"))
    user_ids = pickle.load(open("unique_user_ids.pkl", "rb"))

    decoded_titles = [title.decode("utf-8") for title in book_titles]
    decoded_user_ids = [user.decode("utf-8") for user in user_ids]

    # Load model
    loaded_model = tf.saved_model.load("export")

    return decoded_titles, decoded_user_ids, loaded_model, df


unique_book_titles, unique_user_ids, rec_model, df = load_data()


def recommend_books(user_id, top_k):
    recommendations = []
    ratings = {}

    for book_title in unique_book_titles[:top_k]:
        ratings[book_title] = rec_model(
            {"user_id": np.array([user_id]), "book_title": np.array([book_title])}
        )

    for title, score in sorted(ratings.items(), key=lambda x: x[1], reverse=True):
        top_books = {}
        top_books["title"] = title
        top_books["score"] = f"{score[0][0]: .2f}"
        recommendations.append(top_books)

    return recommendations


def image_cover(df, book_name):
    link = df[df["book_title"] == book_name]["img_l"].values

    if len(link) > 1:
        return link[1]
    else:
        return link[0]


def get_user(df, id):
    # books = ""
    user_data = df[df["user_id"] == id][:5]
    books = user_data["book_title"].values
    rating = user_data["rating"].values
    authors = user_data["book_author"].values

    return books, rating, authors


user_id = st.selectbox("Select a user", unique_user_ids)
rec_btn = st.button("Recommend Books")
st.markdown("#")
st.markdown("#")


plc_holder = st.container()


if rec_btn:
    with plc_holder:
        st.markdown(f"#### These are some of the books user {user_id} has read")
        st.markdown("---")
        books, ratings, authors = get_user(df, int(user_id))

        n_rows = int(1 + 3 // 3)
        rows = [st.columns(n_cols) for _ in range(3)]
        cols = [column for row in rows for column in row]

        for col, title, rating, author in zip(cols, books, ratings, authors):
            col.write(f" :blue[Title]: {title[:15]}...")
            col.write(f" :blue[Rating]: {rating}")
            col.write(f" :blue[Author]: {author}")
            col.image(image_cover(df, title))
    st.markdown("---")

    # RECOMMENDATION SIDE
    st.subheader(f"Top {no_of_rec} Ranked Book Recommendations for user {user_id}")
    st.markdown("---")

    top_rec = recommend_books(user_id, no_of_rec)

    covers = []
    titles = []
    scores = []

    for rec in top_rec:
        covers.append(image_cover(df, rec["title"]))
        titles.append(rec["title"])
        scores.append(rec["score"])

    n_rows = int(1 + no_of_rec // n_cols)
    rows = [st.columns(n_cols) for _ in range(n_cols)]
    cols = [column for row in rows for column in row]

    for col, poster, title, score in zip(cols, covers, titles, scores):
        col.markdown(f"###### :blue[Title]: {title[:15]}...")
        col.write(f" :blue[Rank]: {score}")

        col.image(poster)

2023-09-25 14:44:17.160 
  command:

    streamlit run C:\Users\User\anaconda3\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [30]:
from tensorflow.keras.layers import Embedding, Concatenate, Dense, Flatten
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder

# Create label encoders for categorical features
user_encoder = LabelEncoder()
user_ids = cleaned_data["user_id"].values
encoded_user_ids = user_encoder.fit_transform(user_ids)

book_title_encoder = LabelEncoder()
book_titles = cleaned_data["book_title"].values
encoded_book_titles = book_title_encoder.fit_transform(book_titles)

book_author_encoder = LabelEncoder()
book_authors = cleaned_data["book_author"].values
encoded_book_authors = book_author_encoder.fit_transform(book_authors)

# Create embeddings for text features
embedding_dim = 50

user_embedding = Embedding(input_dim=len(user_encoder.classes_), output_dim=embedding_dim)(encoded_user_ids)
book_title_embedding = Embedding(input_dim=len(book_title_encoder.classes_), output_dim=embedding_dim)(encoded_book_titles)
book_author_embedding = Embedding(input_dim=len(book_author_encoder.classes_), output_dim=embedding_dim)(encoded_book_authors)

# Combine embeddings and other user features
user_features = Flatten()(user_embedding)
book_features = Concatenate()([book_title_embedding, book_author_embedding])
book_features = Flatten()(book_features)

# Concatenate user and book features
concatenated = Concatenate()([user_features, book_features])

# Add dense layers for prediction
dense_layer = Dense(64, activation="relu")(concatenated)
output = Dense(1)(dense_layer)

# Create and compile the model
model = Model(inputs=[encoded_user_ids, encoded_book_titles, encoded_book_authors], outputs=output)
model.compile(optimizer="adam", loss="mean_squared_error")

# Train the model
model.fit([encoded_user_ids, encoded_book_titles, encoded_book_authors], cleaned_data["rating"], epochs=10, batch_size=64)

# Make predictions for recommendations
user_id_for_recommendation = user_encoder.transform(["100002"])
all_book_titles = book_title_encoder.classes_
all_book_authors = book_author_encoder.classes_

predictions = model.predict([
    np.array([user_id_for_recommendation] * len(all_book_titles)), 
    np.arange(len(all_book_titles)), 
    np.arange(len(all_book_authors))
])

# Recommend books with highest predicted ratings
recommended_books_indices = predictions.argsort()[-10:][::-1]
recommended_books = all_book_titles[recommended_books_indices]

print("Recommended Books:", recommended_books)


ValueError: Found unexpected instance while processing input tensors for keras functional model. Expecting KerasTensor which is from tf.keras.Input() or output from keras layer call(). Got: [36135 84771  5022 ... 65492 65492 65492]