## Building and Evaluating Deep Learning Based Book Recommendation System

**Event : Strata Conference , San Francisco, 2019**  

In this notebook, we will build and evaluate deep learning based book recommendation system.

### Envionrment Setup

#### Installing Required Packages

In [None]:
!pip install pandas --user

#### Restart Kernel

In [None]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

#### Import Libraries

In [None]:
# utitlity packages
import os
import warnings
from datetime import datetime
import shutil

# data processing and visualization packages
import numpy as np
import pandas as pd


# tensorflow packages
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model

# ignore warnings 
warnings.filterwarnings('ignore')

In [None]:
# tensorflow version 
print(tf.__version__)

## Import data

In [None]:
# rating dataset
rating_dataset = pd.read_csv("data/ratings.csv")

In [None]:
# explore head
rating_dataset.head()

In [None]:
print("Number of ratings record : ", len(rating_dataset))
# number of users and books
n_users = len(rating_dataset.user_id.unique())
n_items = len(rating_dataset.book_id.unique())
print("Number of unique users : ", n_users)
print("Number of unique items : ", n_items)

In [None]:
# book metadata 
book_dataset = pd.read_csv("data/books.csv")

In [None]:
book_dataset.head()

In [None]:
book_dataset.info()

#### merged dataset

In [None]:
dataset = pd.merge(rating_dataset, book_dataset, how='left',left_on='book_id', right_on='id')

In [None]:
dataset.head()

In [None]:
n_users = 53424
n_items = 10000

## Simple Matrix Factorization Based Model



### Explicit feedback: supervised ratings prediction

For each pair of (user, item) try to predict the rating the user would give to the item.

This is the classical setup for building recommender systems from offline data with explicit supervision signal. 


 ### Predictive ratings  as a regression problem

The following code implements the following architecture:

<img src="images/01_matrix_factorization.png" style="width: 600px;" />

#### Helper Functions

In [None]:
def parser(item_id, user_id, rating):
    """
    parsing each row
    """
    x = {
        'User-Input': user_id,
        'Item-Input': item_id
     }
    
    y = rating
    return x,y    


def train_input_fn(csv_path, batch_size=1024, buffer_size=1024):
    """
    train input function 
    """
    dataset = (
        tf.data.experimental.CsvDataset(
            filenames=csv_path,
            record_defaults=[tf.int32, tf.int32, tf.int32],
            select_cols=[0, 1, 2],
            field_delim=",",
            header=True)
        .map(parser)
        .shuffle(buffer_size=buffer_size)
        .batch(batch_size)
        .prefetch(batch_size)
    )
    iterator = dataset.make_one_shot_iterator()
    batch_feats, batch_labels = iterator.get_next()
    return batch_feats, batch_labels

def eval_input_fn(csv_path, batch_size=1000):
    """
    eval input function
    """
    dataset = (
        tf.data.experimental.CsvDataset(
            filenames=csv_path,
            record_defaults=[tf.int32, tf.int32, tf.int32],
            select_cols=[0, 1, 2],
            field_delim=",",
            header=True)
        .map(parser)
        .batch(batch_size)
    )
    iterator = dataset.make_one_shot_iterator()
    batch_feats, batch_labels = iterator.get_next()
    return batch_feats, batch_labels


#### Model Estimator ( Simple Model )

In [None]:
def get_estimator(tf_embedding_size, tf_model_dir):
    
    # creating book embedding path
    item_input = Input(shape=[1], name="Item-Input")
    item_embedding = Embedding(n_items+1, tf_embedding_size, name="Item-Embedding")(item_input)
    item_vec = Flatten(name="Flatten-Items")(item_embedding)

    # creating user embedding path
    user_input = Input(shape=[1], name="User-Input")
    user_embedding = Embedding(n_users+1, tf_embedding_size, name="User-Embedding")(user_input)
    user_vec = Flatten(name="Flatten-Users")(user_embedding)

    # performing dot product and creating model
    prod = Dot(name="Dot-Product", axes=1)([item_vec, user_vec])
    model = Model([user_input, item_input], prod)
    model.compile('adam', 'mean_squared_error')
    model.summary()
    return tf.keras.estimator.model_to_estimator(keras_model=model,model_dir=tf_model_dir)

#### prepare data and model

In [None]:
# settings
tf_model_dir = "/tmp/model_1/"
tf_data_dir = "data/ratings.csv"
tf_batch_size = 1024
tf_train_steps = 200
tf_embedding_size = 10

# train and eval spec
train_spec = tf.estimator.TrainSpec(input_fn = lambda: train_input_fn(tf_data_dir, batch_size=tf_batch_size, buffer_size=tf_batch_size), max_steps=tf_train_steps)
eval_spec = tf.estimator.EvalSpec(input_fn = lambda: eval_input_fn(tf_data_dir, batch_size=tf_batch_size) ,steps=1,throttle_secs=1,
                                      start_delay_secs=1 )

# model 
estimator = get_estimator(tf_embedding_size, tf_model_dir)

#### Train Estimator

In [None]:
print("Train and evaluate")
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
print("Training done")

## A Deep recommender model

We can use deep learning models with multiple layers ( fully connected and dropout ) for the recommendation system.

<img src="images/02_deep_recsys.png" style="width: 600px;" />



In [None]:
def get_estimator(tf_embedding_size, tf_model_dir):
    # creating book embedding path
    item_input = Input(shape=[1], name="Item-Input")
    item_embedding = Embedding(n_items+1, tf_embedding_size, name="Item-Embedding")(item_input)
    item_vec = Flatten(name="Flatten-Items")(item_embedding)

    # creating user embedding path
    user_input = Input(shape=[1], name="User-Input")
    user_embedding = Embedding(n_users+1, tf_embedding_size, name="User-Embedding")(user_input)
    user_vec = Flatten(name="Flatten-Users")(user_embedding)

    # concatenate features
    conc = Concatenate()([item_vec, user_vec])

    # add fully-connected-layers
    fc1 = Dense(128, activation='relu')(conc)
    fc2 = Dense(32, activation='relu')(fc1)
    out = Dense(1)(fc2)

    # Create model and compile it
    model = Model([user_input, item_input], out)
    model.compile('adam', 'mean_squared_error')
    model.summary()
    return tf.keras.estimator.model_to_estimator(keras_model=model,model_dir=tf_model_dir)

#### Prepare data and model

In [None]:
# settings
tf_model_dir = "/tmp/model_2/"
tf_data_dir = "data/ratings.csv"
tf_batch_size = 1024
tf_train_steps = 200
tf_embedding_size = 10

# train and eval spec
train_spec = tf.estimator.TrainSpec(input_fn = lambda: train_input_fn(tf_data_dir, batch_size=tf_batch_size, buffer_size=tf_batch_size), max_steps=tf_train_steps)
eval_spec = tf.estimator.EvalSpec(input_fn = lambda: eval_input_fn(tf_data_dir, batch_size=tf_batch_size) ,steps=1,throttle_secs=1,
                                      start_delay_secs=1 )

# model 
estimator = get_estimator(tf_embedding_size, tf_model_dir)

#### Train Estimator

In [None]:
print("Train and evaluate")
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
print("Training done")

### Exporting Model

In [None]:
# setup feature specification for serving
tf_export_dir = '/tmp/export/'
feature_spec = {
    'User-Input' : tf.FixedLenFeature(shape=[1], dtype=np.float32),
    'Item-Input' : tf.FixedLenFeature(shape=[1], dtype=np.float32)
}
print("Export saved model")
serving_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
export_dir = estimator.export_savedmodel(tf_export_dir, 
                               serving_input_receiver_fn=serving_fn)

print("Done exporting the model")

In [None]:
!ls /tmp/export/

##### Inspect Model

In [None]:
!saved_model_cli show --dir /tmp/export/* --all

### Making Predictions

In [None]:
predict_fn = tf.contrib.predictor.from_saved_model("/tmp/export/1553619952")

In [None]:
# creating data for prediction

# all items
item_data = np.array(list(set(dataset.id)))

# we need to create user data of the same shape
user_to_predict = 1  # User ID 
user_data = np.array([user_to_predict for i in range(len(item_data))]) # repeat user ID 

In [None]:
# Test inputs represented by Pandas DataFrame.
inputs = pd.DataFrame({
    'User-Input': user_data,
    'Item-Input': item_data
})

inputs.head()


In [None]:
# Convert input data into serialized Example strings.
examples = []
for index, row in inputs.iterrows():
    feature = {}
    for col, value in row.iteritems():
        feature[col] = tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
    example = tf.train.Example(
        features=tf.train.Features(
            feature=feature
        )
    )
    examples.append(example.SerializeToString())
    
predictions = predict_fn({'examples': examples})



In [None]:
pred = predict_fn({'examples': examples})
pred = pred['dense_2'].flatten() # output name
print(-np.sort(-pred)[:10])
# top 10 items 
recommended_item_ids = (-pred).argsort()[:10]
print(recommended_item_ids)

In [None]:
dataset.info()

#### Books Rated By User

In [None]:
dataset[dataset.user_id == user_to_predict][["original_title","small_image_url"]]

### Books Recommended

In [None]:
book_dataset[book_dataset['id'].isin(recommended_item_ids)][["original_title","small_image_url"]]