In [1]:
%pip install -q tensorflow-recommenders

In [2]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

In [3]:
import tensorflow_recommenders as tfrs

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [5]:
df_ratings = pd.read_csv(
    '/content/gdrive/MyDrive/Capstone-Company-Based/dataset/final_dataset/ratings-final_1.csv',
)
df_ratings.head()

Unnamed: 0,user_id,rating,hotel_cluster
0,12,5,1
1,12,3,1
2,12,3,1
3,93,5,80
4,93,5,21


In [6]:
df_hotels = pd.read_csv(
    '/content/gdrive/MyDrive/Capstone-Company-Based/dataset/final_dataset/hotels.csv',
    usecols=[0,1]
)
df_hotels.head()

Unnamed: 0,hotel_cluster,name
0,1,Orchard Hotel Singapore
1,2,Sheraton Tower Singapore
2,3,M Social Singapore
3,4,Swissotel The Stamford
4,5,Crowne Plaza CHANGI AIRPORT


In [7]:
df_merged = df_ratings.merge(df_hotels, on='hotel_cluster')
df_merged.head()

Unnamed: 0,user_id,rating,hotel_cluster,name
0,12,5,1,Orchard Hotel Singapore
1,12,3,1,Orchard Hotel Singapore
2,12,3,1,Orchard Hotel Singapore
3,93,5,80,Regent SINGAPORE
4,93,5,21,Porcelain Hotel by JL Asia


In [8]:
df_merged.user_id = df_merged.user_id.astype(str)
df_merged.head()

Unnamed: 0,user_id,rating,hotel_cluster,name
0,12,5,1,Orchard Hotel Singapore
1,12,3,1,Orchard Hotel Singapore
2,12,3,1,Orchard Hotel Singapore
3,93,5,80,Regent SINGAPORE
4,93,5,21,Porcelain Hotel by JL Asia


In [9]:
ratings = tf.data.Dataset.from_tensor_slices({
    'user_id': df_merged.user_id.to_list(),
    'name': df_merged.name.to_list(),
    'rating': df_merged.rating.to_list()
})

In [10]:
list(ratings.take(5).as_numpy_iterator())

[{'name': b'Orchard Hotel Singapore', 'rating': 5, 'user_id': b'12'},
 {'name': b'Orchard Hotel Singapore', 'rating': 3, 'user_id': b'12'},
 {'name': b'Orchard Hotel Singapore', 'rating': 3, 'user_id': b'12'},
 {'name': b'Regent SINGAPORE', 'rating': 5, 'user_id': b'93'},
 {'name': b'Porcelain Hotel by JL Asia', 'rating': 5, 'user_id': b'93'}]

In [11]:
ratings = ratings.map(lambda x: {
    'user_id': x['user_id'],
    'rating' : x['rating'],
    'name': x['name'],
})
list(ratings.take(5).as_numpy_iterator())

[{'name': b'Orchard Hotel Singapore', 'rating': 5, 'user_id': b'12'},
 {'name': b'Orchard Hotel Singapore', 'rating': 3, 'user_id': b'12'},
 {'name': b'Orchard Hotel Singapore', 'rating': 3, 'user_id': b'12'},
 {'name': b'Regent SINGAPORE', 'rating': 5, 'user_id': b'93'},
 {'name': b'Porcelain Hotel by JL Asia', 'rating': 5, 'user_id': b'93'}]

In [12]:
hotels = tf.data.Dataset.from_tensor_slices({
    'name': df_merged.name.to_list()
})

In [13]:
list(hotels.take(5).as_numpy_iterator())

[{'name': b'Orchard Hotel Singapore'},
 {'name': b'Orchard Hotel Singapore'},
 {'name': b'Orchard Hotel Singapore'},
 {'name': b'Regent SINGAPORE'},
 {'name': b'Porcelain Hotel by JL Asia'}]

In [14]:
hotels = hotels.map(lambda x: x['name'])
list(hotels.take(5).as_numpy_iterator())

[b'Orchard Hotel Singapore',
 b'Orchard Hotel Singapore',
 b'Orchard Hotel Singapore',
 b'Regent SINGAPORE',
 b'Porcelain Hotel by JL Asia']

In [15]:
hotels_name = hotels.batch(1_000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["user_id"])

unique_user_ids = np.unique(np.concatenate(list(user_ids)))
unique_hotel_names = np.unique(np.concatenate(list(hotels_name)))

In [16]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80)
test = shuffled.skip(80).take(20)

In [17]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for movies.
    self.hotels_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_hotel_names, mask_token=None),
      tf.keras.layers.Embedding(len(unique_hotel_names) + 1, embedding_dimension)
    ])

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])

  def call(self, inputs):

    user_id, name = inputs

    user_embedding = self.user_embeddings(user_id)
    hotels_embedding = self.hotels_embeddings(name)

    return self.ratings(tf.concat([user_embedding, hotels_embedding], axis=1))

In [18]:
RankingModel()((["42"], ["Orchard Hotel Singapore"]))



<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-0.00389026]], dtype=float32)>

In [19]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [20]:
class HotelsModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["user_id"], features["name"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("rating")

    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

In [21]:
model = HotelsModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [22]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [23]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f0350ca9b90>

In [24]:
model.evaluate(cached_test, return_dict=True)



{'loss': 14.693414688110352,
 'regularization_loss': 0,
 'root_mean_squared_error': 3.8331990242004395,
 'total_loss': 14.693414688110352}

In [26]:
test_ratings = {}
test_hotels_name = ["Regent SINGAPORE", "Porcelain Hotel by JL Asia", "Orchard Hotel Singapore"]
for name in test_hotels_name:
  test_ratings[name] = model({
      "user_id": np.array(["42"]),
      "name": np.array([name])
  })

print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

Ratings:
Porcelain Hotel by JL Asia: [[6.796811]]
Regent SINGAPORE: [[6.79332]]
Orchard Hotel Singapore: [[6.5999207]]


In [27]:
tf.saved_model.save(model, "export")



INFO:tensorflow:Assets written to: export/assets


INFO:tensorflow:Assets written to: export/assets


In [28]:
loaded = tf.saved_model.load("export")

loaded({"user_id": np.array(["42"]), "name": ["Regent SINGAPORE"]}).numpy()

array([[6.79332]], dtype=float32)

In [29]:
converter = tf.lite.TFLiteConverter.from_saved_model("export")
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)



144056

In [33]:
interpreter = tf.lite.Interpreter(model_path="converted_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model.
if input_details[0]["name"] == "serving_default_hotels:0":
  interpreter.set_tensor(input_details[0]["index"], np.array(["Regent SINGAPORE"]))
  interpreter.set_tensor(input_details[1]["index"], np.array(["42"]))
else:
  interpreter.set_tensor(input_details[0]["index"], np.array(["42"]))
  interpreter.set_tensor(input_details[1]["index"], np.array(["Regent SINGAPORE"]))

interpreter.invoke()

rating = interpreter.get_tensor(output_details[0]['index'])
print(rating)

[[6.5555663]]
