# Book Recommender System in Tensorflow

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn import preprocessing
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine

In [2]:
k = 10

epochs = 10
display_step = 10

learning_rate = 0.3

batch_size = 25

### Reading Dataset and splitting it in a training set and a test set

In [3]:
sql = 'SELECT user_id, book_id, rating, date_created FROM public."Reviews"'

engine = create_engine('postgresql://ece651_ml:TVL3MV0mguz0DOhLbbm2@localhost:5432/ece651')

# Reading dataset

df = pd.pandas.read_sql(sql, engine)

y = df.date_created
df = df.drop('date_created', axis=1)

df.columns = ['user', 'book', 'rating']

X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2)

train_data = X_train
test_data = X_test

num_books = df.book.nunique()
num_users = df.user.nunique()

print("USERS: {} BOOKS: {}".format(num_users, num_books))
print(df.head(5))

USERS: 147 BOOKS: 40
   user  book  rating
0  2292   360       5
1  2293   360       5
2  2294   360       5
3  2297   655       4
4  2295   360       5


### Loading training set with three columns: user, book and ratings

In [4]:
# Normalize in [0, 1]

u = df['user'].values.astype(float)

user_min = u.min()
user_range = u.max() - u.min()

min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(u.reshape(-1,1))
df_normalized = pd.DataFrame(x_scaled)
df['user'] = df_normalized


b = df['book'].values.astype(float)

book_min = b.min()
book_range = b.max() - b.min()

min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(b.reshape(-1,1))
df_normalized = pd.DataFrame(x_scaled)
df['book'] = df_normalized

r = df['rating'].values.astype(float)

rating_min = r.min()
rating_range = r.max() - r.min()

min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(r.reshape(-1,1))
df_normalized = pd.DataFrame(x_scaled)
df['rating'] = df_normalized

### Convert DataFrame in user-item matrix

In [5]:
matrix = df.pivot(index='user', columns='book', values='rating')
matrix.fillna(0, inplace=True)

### Users and items ordered as they are in matrix

In [6]:
users = matrix.index.tolist()
books = matrix.columns.tolist()

matrix = matrix.values

print("Matrix shape: {}".format(matrix.shape))

Matrix shape: (147, 40)


### Network Parameters

In [7]:
num_input = num_books   # num of items
num_hidden_1 = 10       # 1st layer num features
num_hidden_2 = 5        # 2nd layer num features (the latent dim)

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float64)),
}

### Building the encoder

In [8]:
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    # Encoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

### Building the decoder

In [9]:
def decoder(x):
    # Decoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

### Construct model

In [10]:
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

### Prediction

In [11]:
y_pred = decoder_op

### Targets are the input data.

In [12]:
y_true = X

### Define loss and optimizer, minimize the squared error

In [13]:
loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

predictions = pd.DataFrame()

### Define evaluation metrics

In [14]:
eval_x = tf.placeholder(tf.int32, )
eval_y = tf.placeholder(tf.int32, )
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)

### Initialize the variables

In [15]:
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()

### Train the Model

In [16]:
with tf.Session() as session:
    session.run(init)
    session.run(local_init)

    num_batches = int(matrix.shape[0] / batch_size)
    matrix = np.array_split(matrix, num_batches)

    for i in range(epochs):

        avg_cost = 0

        for batch in matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("Epoch: {} Loss: {}".format(i + 1, avg_cost))

        # if i % display_step == 0 or i == 1:
        #     print('Step %i: Minibatch Loss: %f' % (i, l))

    print("Predictions...")

    matrix = np.concatenate(matrix, axis=0)

    preds = session.run(decoder_op, feed_dict={X: matrix})

    # print(matrix)
    # print(preds)
    
    predictions = predictions.append(pd.DataFrame(preds))

    predictions = predictions.stack().reset_index(name='rating')
    predictions.columns = ['user', 'book', 'rating']
    predictions['user'] = predictions['user'].map(lambda value: users[value])
    predictions['book'] = predictions['book'].map(lambda value: books[value])

    print(predictions)

    keys = ['user', 'book']
    i1 = predictions.set_index(keys).index
    i2 = df.set_index(keys).index

    recs = predictions[~i1.isin(i2)]
    recs = recs.sort_values(['user', 'rating'], ascending=[True, False])
    recs = recs.groupby('user').head(k)
    recs.to_csv('prediction.csv', sep=',', index=False, header=False)

Epoch: 1 Loss: 0.32607042193412783
Epoch: 2 Loss: 0.32056357264518737
Epoch: 3 Loss: 0.3134516477584839
Epoch: 4 Loss: 0.30433675050735476
Epoch: 5 Loss: 0.2928016185760498
Epoch: 6 Loss: 0.27848378419876096
Epoch: 7 Loss: 0.26117937862873075
Epoch: 8 Loss: 0.24092041552066804
Epoch: 9 Loss: 0.21784095466136932
Epoch: 10 Loss: 0.19158402383327483
Predictions...
      user      book    rating
0      0.0  0.000000  0.240523
1      0.0  0.300000  0.280023
2      0.0  0.316667  0.937551
3      0.0  0.319231  0.128938
4      0.0  0.323077  0.707076
5      0.0  0.325641  0.847956
6      0.0  0.328205  0.037666
7      0.0  0.379487  0.216074
8      0.0  0.678205  0.186799
9      0.0  0.679487  0.989310
10     0.0  0.687179  0.182303
11     0.0  0.688462  0.081031
12     0.0  0.689744  0.168084
13     0.0  0.697436  0.013617
14     0.0  0.698718  0.935491
15     0.0  0.701282  0.240274
16     0.0  0.702564  0.119312
17     0.0  0.703846  0.056903
18     0.0  0.706410  0.322204
19     0.0  0.83

In [17]:
predictions['user'] = df['user'] * user_range + user_min
predictions['book'] = df['book'] * book_range + book_min

pred = predictions.sort_values(['user', 'rating'], ascending=[True, False])

In [18]:
pred.loc[pred['user'] == 2380].head(10)

Unnamed: 0,user,book,rating
144,2380.0,670.0,0.301436
195,2380.0,677.0,0.277618
127,2380.0,382.0,0.216074
146,2380.0,662.0,0.099414
110,2380.0,375.0,0.030696


In [19]:
pred.loc[pred['user'] == 2380]['book'].head(10)

144    670.0
195    677.0
127    382.0
146    662.0
110    375.0
Name: book, dtype: float64