# Book recommender using TensorFlow

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

In [2]:
rating = pd.read_csv('datasets\\Ratings.csv', sep=';', encoding="latin-1")
user = pd.read_csv('datasets\\Users.csv', sep=';', encoding="latin-1")
book = pd.read_csv('datasets\\Books.csv', sep=';', encoding="latin-1")
book_rating = pd.merge(rating, book, on='ISBN')
book_rating.head()

  user = pd.read_csv('datasets\\Users.csv', sep=';', encoding="latin-1")


Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books


In [3]:
cols = ['Year', 'Publisher', 'Author']
book_rating.drop(cols, axis=1, inplace=True)
book_rating.head()

Unnamed: 0,User-ID,ISBN,Rating,Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


In [4]:
rating_count = (book_rating.groupby(by=['Title'])['Rating'].count().
                reset_index().rename(columns={"Rating":"RatingCount_book"})
                [['Title', 'RatingCount_book']])
rating_count.head()

Unnamed: 0,Title,RatingCount_book
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [5]:
threshold = 25
rating_count = rating_count.query('RatingCount_book >= @threshold')
rating_count.head()

Unnamed: 0,Title,RatingCount_book
75,'Salem's Lot,47
203,10 Lb. Penalty,61
422,101 Dalmatians,37
673,"14,000 Things to Be Happy About",28
697,16 Lighthouse Road,65


In [6]:
user_rating = pd.merge(rating_count, book_rating, left_on='Title', 
                       right_on='Title', how='left')
user_rating.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating
0,'Salem's Lot,47,8936,067103975X,0
1,'Salem's Lot,47,172245,067103975X,0
2,'Salem's Lot,47,189835,067103975X,5
3,'Salem's Lot,47,9226,0451168089,0
4,'Salem's Lot,47,33283,0451168089,10


In [7]:
user_count = (user_rating.groupby(by=['User-ID'])['Rating'].count().
              reset_index().reset_index().
              rename(columns={'Rating':'RatingCount_user'})
              [['User-ID','RatingCount_user']])
user_count.head()

Unnamed: 0,User-ID,RatingCount_user
0,8,2
1,9,2
2,10,1
3,14,1
4,16,2


In [8]:
threshold = 20
user_count = user_count.query('RatingCount_user >= @threshold')
user_count.head()

Unnamed: 0,User-ID,RatingCount_user
52,243,68
54,254,139
89,487,21
96,507,61
115,638,51


In [9]:
combined = user_rating.merge(user_count, left_on='User-ID',right_on='User-ID',
                             how='right')
combined.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating,RatingCount_user
0,2nd Chance,356,243,0446612790,0,68
1,A Confederacy of Dunces,81,243,0517122707,0,68
2,A Map of the World,327,243,0385720106,7,68
3,A Monk Swimming,37,243,0786863986,5,68
4,A Painted House,838,243,044023722X,7,68


In [10]:
combined.shape

(263467, 6)

In [11]:
print(f"Number of unique books:{combined['Title'].nunique()}")
print(f"Number of unique users:{combined['User-ID'].nunique()}")

Number of unique books:5850
Number of unique users:3192


In [12]:
scaler = MinMaxScaler()
combined['Rating'] = combined['Rating'].values.astype(float)
combined.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating,RatingCount_user
0,2nd Chance,356,243,0446612790,0.0,68
1,A Confederacy of Dunces,81,243,0517122707,0.0,68
2,A Map of the World,327,243,0385720106,7.0,68
3,A Monk Swimming,37,243,0786863986,5.0,68
4,A Painted House,838,243,044023722X,7.0,68


In [13]:
rating_scaled = pd.DataFrame(scaler.fit_transform(combined['Rating'].values.reshape(-1,1)))
rating_scaled.head()

Unnamed: 0,0
0,0.0
1,0.0
2,0.7
3,0.5
4,0.7


In [45]:
combined = combined.drop_duplicates(['User-ID','Title'])
combined.head()

Unnamed: 0,Title,RatingCount_book,User-ID,ISBN,Rating,RatingCount_user
0,2nd Chance,356,243,0446612790,0.0,68
1,A Confederacy of Dunces,81,243,0517122707,0.0,68
2,A Map of the World,327,243,0385720106,0.7,68
3,A Monk Swimming,37,243,0786863986,0.5,68
4,A Painted House,838,243,044023722X,0.7,68


In [46]:
user_book_matrix = combined.pivot(index='User-ID', columns='Title',values='Rating')
user_book_matrix.head()

Title,'Salem's Lot,10 Lb. Penalty,101 Dalmatians,"14,000 Things to Be Happy About",16 Lighthouse Road,1984,1st to Die: A Novel,2001: A Space Odyssey,2010: Odyssey Two,204 Rosewood Lane,...,Zlata's Diary: A Child's Life in Sarajevo,Zodiac: The Eco-Thriller,Zombies of the Gene Pool,Zoya,ZwÃ?ÃÂ¶lf.,"\"" Lamb to the Slaughter and Other Stories (Penguin 60s S.)","\""O\"" Is for Outlaw","\""Surely You're Joking, Mr. Feynman!\"": Adventures of a Curious Character",e,stardust
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
243,,,,,,,,,,,...,,,,,,,,,,
254,,,,,,0.9,,,,,...,,0.0,,,,,,,,0.0
487,,,,,,,,,,,...,,,,,,,,,,
507,,,0.0,,,,0.0,,,,...,,,,,,,,,,
638,,,,,,,,,,,...,,,,,,,,,,


In [47]:
user_book_matrix = user_book_matrix.fillna(0, inplace=False)
user_book_matrix = user_book_matrix.values
user_book_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [48]:
import tensorflow.compat.v1 as tf
tf.compat.v1.disable_eager_execution()

In [49]:
num_input = combined['Title'].nunique()
num_hidden_1 = 10
num_hidden_2 = 5

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64))
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float64)),
}

In [50]:
def encoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

In [51]:
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

y_pred = decoder_op
y_true = X

In [52]:
loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(0.03).minimize(loss)
eval_x = tf.placeholder(tf.int32, )
eval_y = tf.placeholder(tf.int32, )
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)

In [53]:
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()
pred_data = pd.DataFrame()

In [55]:
with tf.compat.v1.Session() as session:
    epochs = 100
    batch_size = 35

    session.run(init)
    session.run(local_init)

    num_batches = int(user_book_matrix.shape[0] / batch_size)
    user_book_matrix = np.array_split(user_book_matrix, num_batches)
    
    for i in range(epochs):

        avg_cost = 0
        for batch in user_book_matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("epoch: {} Loss: {}".format(i + 1, avg_cost))

    user_book_matrix = np.concatenate(user_book_matrix, axis=0)

    preds = session.run(decoder_op, feed_dict={X: user_book_matrix})

    pred_data = pred_data.append(preds)

    pred_data = pred_data.stack().reset_index(name='Rating')
    pred_data.columns = ['User-ID', 'Title', 'Rating']
    pred_data['User-ID'] = pred_data['User-ID'].map(lambda value: users[value])
    pred_data['Title'] = pred_data['Title'].map(lambda value: books[value])
    
    keys = ['User-ID', 'Book-Title']
    index_1 = pred_data.set_index(keys).index
    index_2 = combined.set_index(keys).index

    top_ten_ranked = pred_data[~index_1.isin(index_2)]
    top_ten_ranked = top_ten_ranked.sort_values(['User-ID', 'Book-Rating'], ascending=[True, False])
    top_ten_ranked = top_ten_ranked.groupby('User-ID').head(10)

epoch: 1 Loss: 0.3350597138588245
epoch: 2 Loss: 0.288353388944825
epoch: 3 Loss: 0.07644591967646892
epoch: 4 Loss: 0.0038316688873897215
epoch: 5 Loss: 0.0033487269327371985
epoch: 6 Loss: 0.0031905323701941378
epoch: 7 Loss: 0.002923780306406155
epoch: 8 Loss: 0.0027468128751574473
epoch: 9 Loss: 0.0027237682587948147
epoch: 10 Loss: 0.00271572893521247
epoch: 11 Loss: 0.002709796342609839
epoch: 12 Loss: 0.0027052378358708305
epoch: 13 Loss: 0.002701629430393351
epoch: 14 Loss: 0.002698705769604543
epoch: 15 Loss: 0.002696291280009753
epoch: 16 Loss: 0.002694265030396099
epoch: 17 Loss: 0.0026925415834309634
epoch: 18 Loss: 0.00269105988375556
epoch: 19 Loss: 0.002689776012090618
epoch: 20 Loss: 0.002688656774717946
epoch: 21 Loss: 0.0026876747500724518
epoch: 22 Loss: 0.0026868065617016547
epoch: 23 Loss: 0.0026860333362512374
epoch: 24 Loss: 0.0026853402697859883
epoch: 25 Loss: 0.0026847157156565686
epoch: 26 Loss: 0.002684150198167497
epoch: 27 Loss: 0.0026836358931857152
epoch

AttributeError: 'DataFrame' object has no attribute 'append'

In [None]:
top_ten_ranked.loc[top_ten_ranked['User-ID'] == 278582]

In [None]:
book_rating.loc[book_rating['User-ID'] == 278582].sort_values(by=['Book-Rating'], ascending=False)
