# Book recommender using TensorFlow

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

In [2]:
rating = pd.read_csv('datasets\\Ratings.csv', sep=';', encoding="latin-1")
user = pd.read_csv('datasets\\Users.csv', sep=';', encoding="latin-1")
book = pd.read_csv('datasets\\Books.csv', sep=';', encoding="latin-1")
book_rating = pd.merge(rating, book, on='ISBN')
book_rating.head()

  user = pd.read_csv('datasets\\Users.csv', sep=';', encoding="latin-1")


Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books


In [3]:
cols = ['Year', 'Publisher', 'Author']
book_rating.drop(cols, axis=1, inplace=True)
book_rating.head()

Unnamed: 0,User-ID,ISBN,Rating,Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


In [4]:
rating_count = (book_rating.
     groupby(by = ['Title'])['Rating'].
     count().
     reset_index().
     rename(columns = {'Rating': 'RatingCount_book'})
     [['Title', 'RatingCount_book']]
    )
rating_count.head()

Unnamed: 0,Title,RatingCount_book
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [5]:
threshold = 25
rating_count = rating_count.query('RatingCount_book >= @threshold')
rating_count.head()

Unnamed: 0,Title,RatingCount_book
75,'Salem's Lot,47
203,10 Lb. Penalty,61
422,101 Dalmatians,37
673,"14,000 Things to Be Happy About",28
697,16 Lighthouse Road,65


In [7]:
user_rating = pd.merge(rating_count, book_rating, left_on='Title', 
                       right_on='Title', how='left')
user_rating.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating
0,'Salem's Lot,47,8936,067103975X,0
1,'Salem's Lot,47,172245,067103975X,0
2,'Salem's Lot,47,189835,067103975X,5
3,'Salem's Lot,47,9226,0451168089,0
4,'Salem's Lot,47,33283,0451168089,10


In [8]:
user_count = (user_rating.
     groupby(by = ['User-ID'])['Rating'].
     count().
     reset_index().
     rename(columns = {'Rating': 'RatingCount_user'})
     [['User-ID', 'RatingCount_user']]
    )
user_count.head()

Unnamed: 0,User-ID,RatingCount_user
0,8,2
1,9,2
2,10,1
3,14,1
4,16,2


In [9]:
threshold = 20
user_count = user_count.query('RatingCount_user >= @threshold')
user_count.head()

Unnamed: 0,User-ID,RatingCount_user
52,243,68
54,254,139
89,487,21
96,507,61
115,638,51


In [10]:
combined = user_rating.merge(user_count, left_on = 'User-ID', 
                             right_on = 'User-ID', how = 'inner')
combined.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating,RatingCount_user
0,'Salem's Lot,47,8936,067103975X,0,177
1,1st to Die: A Novel,509,8936,0446610038,0,177
2,A Case of Need,236,8936,0451210638,0,177
3,A Perfect Stranger,54,8936,0440168724,0,177
4,Accident,126,8936,0440217547,0,177


In [11]:
combined.shape

(263467, 6)

In [12]:
print(f"Number of unique books:{combined['Title'].nunique()}")
print(f"Number of unique users:{combined['User-ID'].nunique()}")

Number of unique books:5850
Number of unique users:3192


In [13]:
scaler = MinMaxScaler()
combined['Rating'] = combined['Rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(combined['Rating'].values.reshape(-1,1)))
combined['Rating'] = rating_scaled

In [16]:
combined = combined.drop_duplicates(['User-ID', 'Title'])
user_book_matrix = combined.pivot(index='User-ID', columns='Title', values='Rating')
user_book_matrix.fillna(0, inplace=True)

users = user_book_matrix.index.tolist()
books = user_book_matrix.columns.tolist()

user_book_matrix = user_book_matrix.values

In [17]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# tf.compat.v1.disable_eager_execution()

Instructions for updating:
non-resource variables are not supported in the long term


In [19]:
num_input = combined['Title'].nunique()
num_hidden_1 = 10
num_hidden_2 = 5

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float64)),
}


In [20]:
def encoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

In [21]:
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

y_pred = decoder_op

y_true = X

In [22]:
loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(0.03).minimize(loss)
eval_x = tf.placeholder(tf.int32, )
eval_y = tf.placeholder(tf.int32, )
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [23]:
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()
pred_data = pd.DataFrame()

In [25]:
with tf.Session() as session:
    epochs = 100
    batch_size = 35

    session.run(init)
    session.run(local_init)

    num_batches = int(user_book_matrix.shape[0] / batch_size)
    user_book_matrix = np.array_split(user_book_matrix, num_batches)
    
    for i in range(epochs):

        avg_cost = 0
        for batch in user_book_matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("epoch: {} Loss: {}".format(i + 1, avg_cost))

    user_book_matrix = np.concatenate(user_book_matrix, axis=0)

    preds = session.run(decoder_op, feed_dict={X: user_book_matrix})

    pred_data = pd.DataFrame()
    pred_data = pd.concat([pred_data, pd.DataFrame(preds)], ignore_index=True)

    pred_data = pred_data.stack().reset_index(name='Rating')
    pred_data.columns = ['User-ID', 'Title', 'Rating']
    pred_data['User-ID'] = pred_data['User-ID'].map(lambda value: users[value])
    pred_data['Title'] = pred_data['Title'].map(lambda value: books[value])
    
    keys = ['User-ID', 'Title']
    index_1 = pred_data.set_index(keys).index
    index_2 = combined.set_index(keys).index

    top_ten_ranked = pred_data[~index_1.isin(index_2)]
    top_ten_ranked = top_ten_ranked.sort_values(['User-ID', 'Rating'], ascending=[True, False])
    top_ten_ranked = top_ten_ranked.groupby('User-ID').head(10)

epoch: 1 Loss: 0.3330082863896758
epoch: 2 Loss: 0.2834988310114368
epoch: 3 Loss: 0.06891370060147492
epoch: 4 Loss: 0.004131296721215432
epoch: 5 Loss: 0.0035203018374127017
epoch: 6 Loss: 0.003364562090220196
epoch: 7 Loss: 0.0033105375887416714
epoch: 8 Loss: 0.003279232164169406
epoch: 9 Loss: 0.003258676910363547
epoch: 10 Loss: 0.003139110174603187
epoch: 11 Loss: 0.0028949335363314867
epoch: 12 Loss: 0.0028867129233901153
epoch: 13 Loss: 0.0028807780805176924
epoch: 14 Loss: 0.0028760817917677893
epoch: 15 Loss: 0.0028722748482743134
epoch: 16 Loss: 0.0028691348187359317
epoch: 17 Loss: 0.002866508112964476
epoch: 18 Loss: 0.002864284383307037
epoch: 19 Loss: 0.0028623822145164013
epoch: 20 Loss: 0.0028607401316269087
epoch: 21 Loss: 0.0028593110145775824
epoch: 22 Loss: 0.0028580579405220654
epoch: 23 Loss: 0.0028569519169280655
epoch: 24 Loss: 0.0028559696901548693
epoch: 25 Loss: 0.0028550924648848043
epoch: 26 Loss: 0.002854305053395884
epoch: 27 Loss: 0.002853594858526856


In [26]:
top_ten_ranked.loc[top_ten_ranked['User-ID'] == 278582]

Unnamed: 0,User-ID,Title,Rating
18660405,278582,The Lovely Bones: A Novel,0.074696
18659952,278582,The Da Vinci Code,0.062631
18660710,278582,The Secret Life of Bees,0.047807
18657487,278582,Harry Potter and the Chamber of Secrets (Book 2),0.045059
18657491,278582,Harry Potter and the Prisoner of Azkaban (Book 3),0.044297
18656352,278582,Bridget Jones's Diary,0.041779
18658056,278582,Life of Pi,0.041695
18657493,278582,Harry Potter and the Sorcerer's Stone (Harry P...,0.039219
18660634,278582,The Red Tent (Bestselling Backlist),0.03846
18661051,278582,To Kill a Mockingbird,0.037768


In [28]:
book_rating.loc[book_rating['User-ID'] == 278582].sort_values(by=['Rating'],
                                                              ascending=False)

Unnamed: 0,User-ID,ISBN,Rating,Title
174885,278582,0226848620,10,Chinese Bell Murders (Judge Dee Mysteries)
176582,278582,157566254X,10,"Skin Deep, Blood Red"
40008,278582,0441478123,10,The Left Hand of Darkness (Remembering Tomorrow)
174861,278582,0061044725,10,Search the Shadows
58156,278582,0451202503,10,The Songcatcher: A Ballad Novel
64570,278582,1400034779,10,The No. 1 Ladies' Detective Agency (Today Show...
175958,278582,0345350499,10,The Mists of Avalon
176314,278582,0449223558,9,Murdering Mr. Monti: A Merry Little Tale of Se...
174877,278582,0140277471,9,Blanche Cleans Up
176438,278582,0515136557,8,The Cat Who Brought Down the House


In [32]:
# Saving the table of ranks
top_ten_ranked.to_csv('user_recommendations.csv', index=False)