In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd

In [2]:
import pandas as pd
col_list = ["U_ID", "P_ID", "RATING"]
dataset = pd.read_csv('Dataset/Ciao/rating.csv',usecols=col_list, sep='\t',dtype='int')

In [3]:
#to get the unique user ids from dataset
user_ids = dataset['U_ID'].values
unique_ids = np.unique(user_ids)
id_dict = {id: counter for counter, id in enumerate(unique_ids)}
#enumerate(unique_ids)
dataset['U_ID'] = dataset['U_ID'].apply(lambda x: id_dict[x])

In [4]:
#to get the unique product ids from dataset
product_ids = dataset['P_ID'].values
unique_ids = np.unique(product_ids)
#create a dictionary of users with key value from 0 to #users
id_dict = {id: counter for counter, id in enumerate(unique_ids)}
dataset['P_ID'] = dataset['P_ID'].apply(lambda x: id_dict[x])

In [5]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
n_users = len(dataset.U_ID.unique())
n_products = len(dataset.P_ID.unique())

In [6]:
from keras.layers import Input, Embedding, Flatten, Dot, Dense
from keras.models import Model

Using TensorFlow backend.


In [7]:
#Input() is used to instantiate a Keras tensor.
user_input = Input(shape=[1], name="User-Input")
#Embedding(input_dim,output_dim), here latent_factor for user is consider to be 5 
user_embedding = Embedding(n_users+1, 5, name="User-Embedding")(user_input)
#Flattening a tensor means to remove all of the dimensions except for one. This is exactly what the Flatten layer do.
user_vec = Flatten(name="Flatten-Users")(user_embedding)

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
product_input = Input(shape=[1], name="Product-Input")
product_embedding = Embedding(n_products+1, 5, name="Product-Embedding")(product_input)
product_vec = Flatten(name="Flatten-Products")(product_embedding)

In [9]:
#Layer that computes a dot product between samples in two tensors.
prod = Dot(name="Dot-Product", axes=1)([product_vec, user_vec])
#Model groups layers into an object with training and inference features.Model(input,output)
model = Model([user_input, product_input], prod)
#Configures the model for training.compile(optimizer, loss, metrics)
model.compile('adam', loss='mean_squared_error', metrics=['mae', 'mse'])

In [10]:
#Trains the model for a fixed number of epochs (iterations on a dataset).
history = model.fit([train.U_ID, train.P_ID], train.RATING, epochs=10, verbose=1)
model.save('regression_model.h5')

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
import matplotlib.pyplot as plt
pd.Series(history.history['loss']).plot(logy=True)
plt.xlabel("Epoch")
plt.ylabel("Training Error")

NameError: name 'history' is not defined

In [13]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Product-Input (InputLayer)      (None, 1)            0                                            
__________________________________________________________________________________________________
User-Input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
Product-Embedding (Embedding)   (None, 1, 5)         525575      Product-Input[0][0]              
__________________________________________________________________________________________________
User-Embedding (Embedding)      (None, 1, 5)         36880       User-Input[0][0]                 
____________________________________________________________________________________________

In [16]:
#import os
#os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
from keras.utils import plot_model
#tf.keras.utils.plot_model(model, to_file='model.png')
tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)  

In [94]:
#Returns the loss value & metrics values for the model in test mode.
results = model.evaluate([test.U_ID, test.P_ID], test.RATING, batch_size=1)



In [95]:
#loss='mean_squared_error', metrics=['mae', 'mse']
results

[6.470648041065961, 1.9238553047180176, 6.470556735992432]

In [74]:
#Retrieves a layer based on either its name (unique) or index.
#get_weights() Returns the current weights of the layer.
product_embedding_learnt = model.get_layer(name='Product-Embedding').get_weights()[0]
pd.DataFrame(product_embedding_learnt).describe()

Unnamed: 0,0,1,2,3,4
count,105115.0,105115.0,105115.0,105115.0,105115.0
mean,-0.284584,-0.256382,0.29077,-0.105683,-0.282524
std,0.203124,0.203867,0.198821,0.274087,0.196882
min,-1.239104,-1.076617,-0.858611,-1.009389,-1.317708
25%,-0.360422,-0.350523,0.217114,-0.311093,-0.359921
50%,-0.292948,-0.284352,0.293701,-0.183948,-0.291685
75%,-0.213339,-0.177053,0.362213,0.111939,-0.213335
max,0.629761,0.801228,1.598547,1.035295,0.647533


In [97]:
user_embedding_learnt = model.get_layer(name='User-Embedding').get_weights()[0]

In [98]:
def recommend(user_id, number_of_products=5):
  products = user_embedding_learnt[user_id]@product_embedding_learnt.T
  mids = np.argpartition(products, -number_of_products)[-number_of_products:]
  return mids

In [99]:
recommend(user_id=1)

array([17173, 39925, 86146, 87604, 74652], dtype=int64)

In [80]:
recommend(user_id=600)

array([91543, 48431, 50477, 95234, 71816], dtype=int64)

# Creating dataset for making recommendations for the first user
product_data = np.array(list(set(dataset.P_ID)))
user = np.array([1 for i in range(len(product_data))])
predictions = model.predict([user, product_data])
predictions = np.array([a[0] for a in predictions])
recommended_product_ids = (-predictions).argsort()[:5]
print(recommended_product_ids)
print(predictions[recommended_product_ids])

# Creating dataset for making recommendations for the 600th user
product_data = np.array(list(set(dataset.P_ID)))
user = np.array([600 for i in range(len(product_data))])
predictions = model.predict([user, product_data])
predictions = np.array([a[0] for a in predictions])
recommended_product_ids = (-predictions).argsort()[:5]
print(recommended_product_ids)

user_ids = df['U_ID'].values

unique_ids = np.unique(user_ids)

id_dict = {id: counter for counter, id in enumerate(unique_ids)}

enumerate(unique_ids)

df['U_ID'] = df['U_ID'].apply(lambda x: id_dict[x])

num_items = np.unique(df['P_ID'].values).size
num_users = unique_ids.size

epochs = 500
adam_learning_rate = .001
adam_beta_1 = 0.9
adam_beta_2 = 0.999
adam_epsilon = 0.1

params = \
    {"train_epochs": epochs,
     "batches_per_step": 1,
     "use_seed": False,
     "batch_size": 10000,
     "eval_batch_size": 1,
     "learning_rate": adam_learning_rate,
     "mf_dim": 10,
     "model_layers": [int(layer) for layer in ["64", "32", "16", "8"]],
     "mf_regularization": 0.0,
     "mlp_reg_layers": [float(reg) for reg in ["0.", "0.", "0.", "0."]],
     "num_neg": 4,
     "num_gpus": 0,
     "use_tpu": False,
     "tpu": None,
     "tpu_zone": None,
     "tpu_gcp_project": None,
     "beta1": adam_beta_1,
     "beta2": adam_beta_2,
     "epsilon": adam_epsilon,
     "match_mlperf": False,
     "use_xla_for_gpu": False,
     "clone_model_in_keras_dist_strat":False,
     "epochs_between_evals": 1,
     "turn_off_distribution_strategy": True,
     "num_users": num_users,
     "num_items": num_items,
     "loss": 'mse',
     "train_size": 0.95
}
