In [22]:
import tensorflow_recommenders as tfrs
import tensorflow as tf
import pandas as pd
import numpy as np
import pickle as pkl
from typing import Dict, Text

In [23]:
df_ratings = pd.read_csv("../../../Dataset/Tourism Rating/raw/tourism_rating.csv")   
df_tourism = pd.read_csv("../../../Dataset/Tourism/tourism.csv")


In [24]:
df_ratings.columns

Index(['User_Id', 'Place_Id', 'Place_Ratings'], dtype='object')

In [25]:
tourism = tf.data.Dataset.from_tensor_slices(dict(df_tourism))
ratings = tf.data.Dataset.from_tensor_slices(dict(df_ratings))

In [26]:
ratings = ratings.map(lambda x:{
    "user_id":x["User_Id"],
    "place_id":x["Place_Id"]
})
tourism = tourism.map(lambda x:x["Place_Id"])

In [27]:
print(ratings)
print(tourism)

<MapDataset element_spec={'user_id': TensorSpec(shape=(), dtype=tf.int64, name=None), 'place_id': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
<MapDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>


In [28]:
user_vocab = tf.keras.layers.IntegerLookup(mask_token=None)
user_vocab.adapt(ratings.map(lambda x: x["user_id"]))

tourism_vocab = tf.keras.layers.IntegerLookup(mask_token=None)
tourism_vocab.adapt(tourism)

In [29]:
print(user_vocab.get_vocabulary())
print(tourism_vocab.get_vocabulary())

[-1, 267, 276, 143, 40, 142, 17, 54, 49, 242, 184, 19, 289, 247, 227, 168, 157, 105, 99, 294, 179, 91, 86, 74, 230, 198, 170, 159, 144, 126, 97, 53, 31, 28, 283, 274, 258, 206, 195, 194, 167, 146, 107, 65, 38, 299, 200, 187, 160, 131, 124, 116, 45, 44, 33, 27, 288, 228, 212, 208, 178, 173, 136, 93, 92, 67, 58, 287, 263, 262, 261, 260, 257, 250, 234, 224, 204, 182, 181, 145, 134, 94, 78, 297, 296, 286, 282, 277, 239, 231, 223, 201, 192, 175, 149, 117, 84, 71, 57, 55, 15, 273, 240, 226, 222, 216, 202, 165, 164, 139, 138, 137, 113, 108, 102, 76, 68, 64, 61, 30, 26, 292, 285, 272, 265, 255, 248, 189, 185, 174, 163, 135, 133, 127, 123, 118, 106, 96, 79, 63, 32, 29, 20, 18, 12, 281, 264, 244, 235, 214, 158, 147, 128, 120, 114, 104, 103, 80, 36, 25, 14, 9, 5, 295, 268, 256, 243, 233, 210, 193, 171, 162, 150, 129, 125, 98, 87, 83, 59, 51, 23, 290, 280, 238, 237, 229, 217, 215, 190, 172, 169, 161, 140, 122, 90, 66, 56, 43, 37, 35, 298, 278, 269, 225, 220, 213, 203, 199, 196, 186, 176, 155, 152,

In [30]:
class CollaborativeModel(tfrs.Model):
    def __init__(self):
        super().__init__()
        self.user_model = tf.keras.Sequential([
            user_vocab,
            tf.keras.layers.Embedding(user_vocab.vocabulary_size(), 16)
        ])
        self.tourism_model = tf.keras.Sequential([
            tourism_vocab,
            tf.keras.layers.Embedding(tourism_vocab.vocabulary_size(), 16)
        ])
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=tourism.batch(128).map(self.tourism_model)
            )
        )
    def compute_loss(self, features, training=False):
        user_embeddings = self.user_model(features["user_id"])
        tourism_embeddings = self.tourism_model(features["place_id"])
        return self.task(user_embeddings, tourism_embeddings)

In [31]:
model = CollaborativeModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad())
model.fit(ratings.batch(32), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x226bf859460>

In [32]:
index = tfrs.layers.factorized_top_k.BruteForce (model.user_model)
no_user = 1
index.index_from_dataset(tourism.batch(100).map(lambda place: (place,model.tourism_model(place))))
_, titles = index(np.array([no_user]))
print(f"Recommendations for user {no_user}: {titles[0,:10]}")

Recommendations for user 1: [238 365 323 149 126 118 106 205  10 157]


In [33]:
pkl.dump(user_vocab.get_vocabulary(), open("./pickles/user_vocab.pkl", "wb"))
pkl.dump(tourism_vocab.get_vocabulary(), open("./pickles/tourism_vocab.pkl", "wb"))
tf.data.experimental.save(tourism, "./tensor/tourism")

In [34]:
model.save_weights("./weights/model_weights")