In [11]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings 
import tensorflow.keras as tf
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')

In [12]:
ratings = pd.read_csv("data/ratings.csv")
tags = pd.read_csv("data/book_tags.csv")
books = pd.read_csv("data/books.csv")
# books.head() 
ratings.head() 
ratings['user_id'].max() 

53424

In [13]:
# now we'll use sklearn to split the training and test data
X_train, X_test = train_test_split(ratings, test_size = 0.2, shuffle = True)

In [14]:
# we need to get the unique # of entries (books and users)

num_unique_books = ratings.book_id.nunique() 
num_unique_users = ratings.user_id.nunique() 

In [15]:
# Now we will make the book input network
# First we want to create an input layer to accept a 1D array of book IDs 
book_inputs = tf.layers.Input(shape=[1])

# create an embedding layerr with of shape (num_unique_books +1, size_of_embedding_layer) 
size_of_embedding_layer = 15 # can be as large as we'd like, based on how large we want the embedding layer to be
embed_books = tf.layers.Embedding(num_unique_books +1, size_of_embedding_layer)(book_inputs)

# uses the functional API to pass the output of the input layer to the embedding layer
books_out = tf.layers.Flatten()(embed_books)

In [16]:
# do the same thing for the users now
user_inputs = tf.layers.Input(shape=[1])
embed_users = tf.layers.Embedding(num_unique_users +1, size_of_embedding_layer)(user_inputs)
inputs_out = tf.layers.Flatten()(embed_users)

In [17]:
# now we want to merge the layers
conc_layer = tf.layers.Concatenate()([books_out, inputs_out])
conc_layer
conc_layer.get_shape()

TensorShape([None, 30])

In [18]:
# add a dense layer with num_nodes on top of it
num_nodes = 128  # do not tweak this parameter
activ = 'relu'
x = tf.layers.Dense(num_nodes, activation = activ)(conc_layer)
x

<KerasTensor: shape=(None, 128) dtype=float32 (created by layer 'dense')>

In [19]:
# for the recommendation layer
x_out = x = tf.layers.Dense(1, activation = activ)(x)
x_out

<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'dense_1')>

In [20]:
# now that we've defined the architecture, we can create the model. 
model = tf.Model([book_inputs, user_inputs], x_out)
model

<tensorflow.python.keras.engine.functional.Functional at 0x7fddb02d8790>

In [21]:
lr = 0.1 
opt = tf.optimizers.Adam(learning_rate = lr)
loss = 'mean_squared_error'
model.compile(optimizer=opt, loss=loss)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 15)        150015      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 15)        801375      input_2[0][0]                    
______________________________________________________________________________________________

In [23]:
# now time to train the model and evaluate it
batch_size = 64
# batch_size = 10
epochs = 5
validation = ([X_test.book_id, X_test.user_id], X_test.rating)
hist = model.fit([X_train.book_id, X_train.user_id], X_train.rating, 
                batch_size = batch_size,
                epochs = epochs, 
                verbose = 1, 
                validation_data = validation)

Epoch 1/5
  922/12272 [=>............................] - ETA: 44s - loss: 5.1576

KeyboardInterrupt: 

In [24]:
# now plot some summaries 
train_loss = hist.history['loss']
val_loss = hist.history['val_loss']

plt.plot(train_loss, color='r', label='Training Loss')
plt.plot(val_loss, color='b', label='Validation Loss')
plt.title("Loss: Training and Validation Losses")
plt.legend() 
plt.show()

NameError: name 'hist' is not defined

In [25]:
# save the model
model.save('model')

# now to visualize the embeddings
book_embedding = model.get_layer('embedding')
book_embedding_weights = book_embedding.get_weights()[0]
book_embedding_weights.shape

INFO:tensorflow:Assets written to: model/assets


(10001, 15)

In [26]:
# get the book titles from the books.csv
vis_books = books.copy() 
vis_books = vis_books.set_index("book_id")

In [27]:
# now need to form .tsv files
book_ids = list(ratings.book_id.unique())
dict_map = {}
for i in book_ids:
    dict_map[i] = vis_books.iloc[i-1]['title']   # copy the titles over

out_v = open('vecs.tsv', 'w')
out_m = open('meta.tsv', 'w')

for i in book_ids:
    book = dict_map[i]
    embeddings = book_embedding_weights[i]
    out_m.write(book +"\n")
    out_v.write('\t'.join([str(x) for x in embeddings]) +"\n")

out_v.close()
out_m.close()

In [28]:
model

<tensorflow.python.keras.engine.functional.Functional at 0x7fddb02d8790>

In [34]:
book_ids = list(ratings.book_id.unique())
book_arr = np.array(book_ids)
book_arr 

array([    1,     2,     3, ...,  9998,  9999, 10000])

In [35]:
user = np.array([100 for i in range(len(book_ids))])