# Book recommender using TensorFlow

In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

In [7]:
rating = pd.read_csv('datasets\\Ratings.csv', sep=';', encoding="latin-1")
user = pd.read_csv('datasets\\Users.csv', sep=';', encoding="latin-1")
book = pd.read_csv('datasets\\Books.csv', sep=';', encoding="latin-1")
book_rating = pd.merge(rating, book, on='ISBN')
book_rating.head()

  user = pd.read_csv('datasets\\Users.csv', sep=';', encoding="latin-1")


Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books


In [8]:
cols = ['Year', 'Publisher', 'Author']
book_rating.drop(cols, axis=1, inplace=True)
book_rating.head()

Unnamed: 0,User-ID,ISBN,Rating,Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


In [9]:
rating_count = (book_rating.
     groupby(by = ['Title'])['Rating'].
     count().
     reset_index().
     rename(columns = {'Rating': 'RatingCount_book'})
     [['Title', 'RatingCount_book']]
    )
rating_count.head()

Unnamed: 0,Title,RatingCount_book
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [10]:
threshold = 25
rating_count = rating_count.query('RatingCount_book >= @threshold')
rating_count.head()

Unnamed: 0,Title,RatingCount_book
75,'Salem's Lot,47
203,10 Lb. Penalty,61
422,101 Dalmatians,37
673,"14,000 Things to Be Happy About",28
697,16 Lighthouse Road,65


In [11]:
user_rating = pd.merge(rating_count, book_rating, left_on='Title', 
                       right_on='Title', how='left')
user_rating.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating
0,'Salem's Lot,47,8936,067103975X,0
1,'Salem's Lot,47,172245,067103975X,0
2,'Salem's Lot,47,189835,067103975X,5
3,'Salem's Lot,47,9226,0451168089,0
4,'Salem's Lot,47,33283,0451168089,10


In [12]:
user_count = (user_rating.
     groupby(by = ['User-ID'])['Rating'].
     count().
     reset_index().
     rename(columns = {'Rating': 'RatingCount_user'})
     [['User-ID', 'RatingCount_user']]
    )
user_count.head()

Unnamed: 0,User-ID,RatingCount_user
0,8,2
1,9,2
2,10,1
3,14,1
4,16,2


In [13]:
threshold = 20
user_count = user_count.query('RatingCount_user >= @threshold')
user_count.head()

Unnamed: 0,User-ID,RatingCount_user
52,243,68
54,254,139
89,487,21
96,507,61
115,638,51


In [14]:
combined = user_rating.merge(user_count, left_on = 'User-ID', 
                             right_on = 'User-ID', how = 'inner')
combined.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating,RatingCount_user
0,'Salem's Lot,47,8936,067103975X,0,177
1,1st to Die: A Novel,509,8936,0446610038,0,177
2,A Case of Need,236,8936,0451210638,0,177
3,A Perfect Stranger,54,8936,0440168724,0,177
4,Accident,126,8936,0440217547,0,177


In [15]:
combined.shape

(263467, 6)

In [16]:
print(f"Number of unique books:{combined['Title'].nunique()}")
print(f"Number of unique users:{combined['User-ID'].nunique()}")

Number of unique books:5850
Number of unique users:3192


In [17]:
scaler = MinMaxScaler()
combined['Rating'] = combined['Rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(combined['Rating'].values.reshape(-1,1)))
combined['Rating'] = rating_scaled

In [18]:
combined = combined.drop_duplicates(['User-ID', 'Title'])
user_book_matrix = combined.pivot(index='User-ID', columns='Title', values='Rating')
user_book_matrix.fillna(0, inplace=True)

users = user_book_matrix.index.tolist()
books = user_book_matrix.columns.tolist()

user_book_matrix = user_book_matrix.values

In [19]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# tf.compat.v1.disable_eager_execution()

Instructions for updating:
non-resource variables are not supported in the long term


In [20]:
num_input = combined['Title'].nunique()
num_hidden_1 = 10
num_hidden_2 = 5

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float64)),
}


In [21]:
def encoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

In [22]:
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

y_pred = decoder_op

y_true = X

In [23]:
loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(0.03).minimize(loss)
eval_x = tf.placeholder(tf.int32, )
eval_y = tf.placeholder(tf.int32, )
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [24]:
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()
pred_data = pd.DataFrame()

In [25]:
with tf.Session() as session:
    epochs = 100
    batch_size = 35

    session.run(init)
    session.run(local_init)

    num_batches = int(user_book_matrix.shape[0] / batch_size)
    user_book_matrix = np.array_split(user_book_matrix, num_batches)
    
    for i in range(epochs):

        avg_cost = 0
        for batch in user_book_matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("epoch: {} Loss: {}".format(i + 1, avg_cost))

    user_book_matrix = np.concatenate(user_book_matrix, axis=0)

    preds = session.run(decoder_op, feed_dict={X: user_book_matrix})

    pred_data = pd.DataFrame()
    pred_data = pd.concat([pred_data, pd.DataFrame(preds)], ignore_index=True)

    pred_data = pred_data.stack().reset_index(name='Rating')
    pred_data.columns = ['User-ID', 'Title', 'Rating']
    pred_data['User-ID'] = pred_data['User-ID'].map(lambda value: users[value])
    pred_data['Title'] = pred_data['Title'].map(lambda value: books[value])
    
    keys = ['User-ID', 'Title']
    index_1 = pred_data.set_index(keys).index
    index_2 = combined.set_index(keys).index

    top_ten_ranked = pred_data[~index_1.isin(index_2)]
    top_ten_ranked = top_ten_ranked.sort_values(['User-ID', 'Rating'], ascending=[True, False])
    top_ten_ranked = top_ten_ranked.groupby('User-ID').head(10)

epoch: 1 Loss: 0.36475486414773123
epoch: 2 Loss: 0.3026807881318606
epoch: 3 Loss: 0.07298528950579547
epoch: 4 Loss: 0.0034013047793900572
epoch: 5 Loss: 0.003140218457422004
epoch: 6 Loss: 0.0029561906607801117
epoch: 7 Loss: 0.0029269718260575947
epoch: 8 Loss: 0.002909530059096741
epoch: 9 Loss: 0.00289765869108653
epoch: 10 Loss: 0.002792376728093886
epoch: 11 Loss: 0.0027137084193732384
epoch: 12 Loss: 0.0027087745832402137
epoch: 13 Loss: 0.0027048607883913504
epoch: 14 Loss: 0.002701682343076055
epoch: 15 Loss: 0.002699093985279183
epoch: 16 Loss: 0.0026969435179585613
epoch: 17 Loss: 0.0026951131045020054
epoch: 18 Loss: 0.002693533390801359
epoch: 19 Loss: 0.002692155206094977
epoch: 20 Loss: 0.0026909405925914973
epoch: 21 Loss: 0.002689859226018518
epoch: 22 Loss: 0.002688885826341153
epoch: 23 Loss: 0.002687999799561042
epoch: 24 Loss: 0.002687189305856169
epoch: 25 Loss: 0.0026864598474496014
epoch: 26 Loss: 0.0026858165670841276
epoch: 27 Loss: 0.002685240307403217
epoc

In [31]:
top_ten_ranked.loc[top_ten_ranked['User-ID'] == 243]

Unnamed: 0,User-ID,Title,Rating
0,243,The Da Vinci Code,0.063406
1,243,The Secret Life of Bees,0.047143
2,243,Harry Potter and the Chamber of Secrets (Book 2),0.04574
3,243,Harry Potter and the Prisoner of Azkaban (Book 3),0.043863
4,243,Life of Pi,0.041753
5,243,Bridget Jones's Diary,0.041351
6,243,Harry Potter and the Sorcerer's Stone (Harry P...,0.039205
7,243,The Red Tent (Bestselling Backlist),0.038323
8,243,To Kill a Mockingbird,0.038177
9,243,Harry Potter and the Goblet of Fire (Book 4),0.037303


In [27]:
book_rating.loc[book_rating['User-ID'] == 278582].sort_values(by=['Rating'],
                                                              ascending=False)

Unnamed: 0,User-ID,ISBN,Rating,Title
174885,278582,0226848620,10,Chinese Bell Murders (Judge Dee Mysteries)
176582,278582,157566254X,10,"Skin Deep, Blood Red"
40008,278582,0441478123,10,The Left Hand of Darkness (Remembering Tomorrow)
174861,278582,0061044725,10,Search the Shadows
58156,278582,0451202503,10,The Songcatcher: A Ballad Novel
64570,278582,1400034779,10,The No. 1 Ladies' Detective Agency (Today Show...
175958,278582,0345350499,10,The Mists of Avalon
176314,278582,0449223558,9,Murdering Mr. Monti: A Merry Little Tale of Se...
174877,278582,0140277471,9,Blanche Cleans Up
176438,278582,0515136557,8,The Cat Who Brought Down the House


In [28]:
# Saving the table of ranks
top_ten_ranked.to_csv('user_recommendations.csv', index=False)

In [29]:
# Loading the table of ranks
top_ten_ranked = pd.read_csv('user_recommendations.csv')

In [30]:
# Getting list of unique users
top_ten_ranked['User-ID'].unique()

array([   243,    254,    487, ..., 278582, 278633, 278843], dtype=int64)