In [1]:
import numpy as np
import pandas as pd

import torch.nn as nn                 # the torch module to implement the Neural Networks
import torch.nn.parallel              # for parallel computations
import torch.optim as optim           # for optimizers
import torch.utils.data               # tools
from torch.autograd import Variable   # for Stochastic Gradient Descent

In [2]:
reviews = pd.read_csv('data/attractions/reviews/attractions_reviews_clean.csv')
reviews.head()

Unnamed: 0,attraction_id,rating,review,review_date,user,user_id
0,0,5,Another 'Dave' Guides us Around Vancouver. Lan...,"March 14, 2019",drew22perthaustralia,0
1,0,5,Fantastic way to explore VC. An easy way to ex...,"March 1, 2019",marc_h,1
2,0,5,This was a great half day tour!. Was there for...,"February 28, 2019",maggiehand,2
3,0,5,All the main attractions. Scott was our lovely...,"December 19, 2018",catherine255066,3
4,0,5,Excellent Vancouver Sightseeing Tour. We would...,"November 29, 2018",gearjamkw,4


In [3]:
details = pd.read_csv('data/attractions/details/attractions_details_clean.csv')
details.head()

Unnamed: 0,attraction_id,name,country,province,city,location,price,rating
0,0,vancouver_city_sightseeing_tour,canada,british_columbia,vancouver,"{'lat': 49.1978322, 'lng': -123.0649959}",80.0,4.5
1,1,vancouver_to_victoria_and_butchart_gardens_tou...,canada,british_columbia,vancouver,"{'lat': 49.1978322, 'lng': -123.0649959}",210.0,5.0
2,2,quebec_city_and_montmorency_falls_day_trip_fro...,canada,quebec,montreal,"{'lat': 45.5001458, 'lng': -73.5720264}",115.0,4.5
3,3,niagara_falls_day_trip_from_toronto,canada,ontario,toronto,"{'lat': 43.6561507, 'lng': -79.3842642}",169.0,5.0
4,4,"best_of_niagara_falls_tour_from_niagara_falls,...",canada,ontario,niagara_falls,"{'lat': 43.0857136, 'lng': -79.0824311}",158.0,5.0


In [4]:
reviews.drop(['review', 'review_date', 'user'], axis=1, inplace=True)
reviews.head()


Unnamed: 0,attraction_id,rating,user_id
0,0,5,0
1,0,5,1
2,0,5,2
3,0,5,3
4,0,5,4


In [5]:
# Pivot the ratings data to get a user-movie rating matrix
reviews_matrix = reviews.pivot_table(index='user_id', columns='attraction_id', values='rating', aggfunc='mean')
reviews_matrix.fillna(0, inplace=True)

In [6]:
reviews_matrix

attraction_id,0,1,2,3,4,5,6,7,8,10,...,3645,3646,3647,3650,3651,3652,3653,3654,3655,3656
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13089,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13090,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
X_train, X_test = train_test_split(reviews_matrix.values, test_size=0.2, random_state=42)

In [8]:
# Normalize the training data to have zero mean and unit variance
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_train_norm

# Convert pivot table DataFrame to binary matrix format using threshold
# binary_matrix = (pivot_table.fillna(0) > 3).astype(np.int32).values

array([[-0.06353113, -0.07871372, -0.07034215, ...,  0.        ,
        -0.03777828, -0.01381975],
       [-0.06353113, -0.07871372, -0.07034215, ...,  0.        ,
        -0.03777828, -0.01381975],
       [-0.06353113, -0.07871372, -0.07034215, ...,  0.        ,
        -0.03777828, -0.01381975],
       ...,
       [-0.06353113, -0.07871372, -0.07034215, ...,  0.        ,
        -0.03777828, -0.01381975],
       [-0.06353113, -0.07871372, -0.07034215, ...,  0.        ,
        -0.03777828, -0.01381975],
       [-0.06353113, -0.07871372, -0.07034215, ...,  0.        ,
        -0.03777828, -0.01381975]])

In [27]:
# Define the number of visible and hidden units for the RBM model
visible_units = X_train_norm.shape[1]
hidden_units = 100
learning_rate = 0.01
batch_size = 100
epochs = 10

print(visible_units, hidden_units)

1619 100


In [28]:
train_set = torch.FloatTensor(X_train_norm)
test_set = torch.FloatTensor(X_test)

In [29]:
import tensorflow as tf
from rbm_model import RBM

# Train RBM model
rbm_model = RBM(visible_units, hidden_units)
optimizer = tf.keras.optimizers.Adam(learning_rate)

for epoch in range(epochs):
    loss_update = 0
    for i in range(0, X_train_norm.shape[0], batch_size):
        batch = X_train_norm[i:i+batch_size]
        with tf.GradientTape() as tape:
            reconstructed_batch = rbm_model(batch)
            loss = tf.reduce_mean(tf.square(batch - reconstructed_batch))
            loss_update = loss 
        gradients = tape.gradient(loss, rbm_model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, rbm_model.trainable_variables))
    print("Epoch: ", epoch, "Loss: ", loss_update.numpy())


Epoch:  0 Loss:  0.6830478
Epoch:  1 Loss:  0.68159264
Epoch:  2 Loss:  0.6812017
Epoch:  3 Loss:  0.6808563
Epoch:  4 Loss:  0.68024915
Epoch:  5 Loss:  0.67988926
Epoch:  6 Loss:  0.6797809
Epoch:  7 Loss:  0.6796494
Epoch:  8 Loss:  0.67950606
Epoch:  9 Loss:  0.67936534


In [35]:
# Save the trained model for future use
#rbm_model.save('weight/rbm_weight_model.h5')

# Example usage
# Assume you have trained an RBM model named "model"
# and you have a test dataset "test_data"
# You can save the model using the following code:
checkpoint_path = "weight/rbm_weight_model"
checkpoint = tf.train.Checkpoint(model=rbm_model)
checkpoint.write(checkpoint_path)

# You can then load the model using the following code:
loaded_checkpoint = tf.train.Checkpoint(model=RBM(visible_units, hidden_units))
loaded_checkpoint.restore(checkpoint_path)
new_rbm_model = loaded_checkpoint.model


In [31]:
def evaluate(model, test_matrix):
    aps = []
    for user_id in range(test_matrix.shape[0]):
        user_ratings = test_matrix[user_id]
        rated_items = np.where(user_ratings > 0)[0]
        if len(rated_items) > 0:
            hidden_representation = model.sample_hidden(tf.constant([user_ratings], dtype=tf.float32))
            predicted_ratings = model.sample_visible(hidden_representation)
            recommended_items = (-predicted_ratings.numpy()).argsort()[0]
            ap = average_precision(recommended_items, rated_items)
            aps.append(ap)
    map_score = np.mean(aps)
    return map_score

def average_precision(recommended_items, relevant_items):
    ap = 0
    hits = 0
    for i, item in enumerate(recommended_items):
        if item in relevant_items:
            hits += 1
            ap += hits / (i + 1)
    if hits > 0:
        ap /= hits
    return ap

# Use the trained RBM model to predict the ratings for the test set
X_test_norm = scaler.transform(X_test)

print(evaluate(new_rbm_model, X_test_norm))

0.8870238557274591


In [33]:
# Make hotel recommendations for a given user
user_id = 0
user_ratings = X_train_norm[user_id]
hidden_representation = new_rbm_model.sample_hidden(tf.constant([user_ratings], dtype=tf.float32))
predicted_ratings = new_rbm_model.sample_visible(hidden_representation)
recommendations = (-predicted_ratings.numpy()).argsort()[0][:10]

print(recommendations)

[ 202    6  416 1329  115 1085 1084 1083 1082 1081]
