In [1]:
import numpy as np
import pandas as pd
import os
import warnings
from typing import Dict, Text
import tensorflow as tf
import tensorflow_recommenders as tfrs

In [2]:
print(tf. __version__) 

2.10.0


In [3]:
dataset = pd.read_csv('\data\user-item-data.csv')
dataset.drop(['duration_rate'], axis=1, inplace=True)
dataset.rename(columns = {'item_id':'item'}, inplace = True)
dataset = dataset.astype({'user_id':'string', 'item': 'string'})
dataset['click_rate'] = dataset['click_rate'].astype(np.float32)
dataset.head(9)

Unnamed: 0,user_id,item,click_rate
0,1,Item01,0.189682
1,1,Item02,0.043302
2,1,Item03,0.242269
3,1,Item04,0.105846
4,1,Item05,0.035231
5,1,Item06,0.010828
6,1,Item07,0.006826
7,1,Item08,0.234307
8,1,Item09,0.212547


In [4]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225000 entries, 0 to 224999
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   user_id     225000 non-null  string 
 1   item        225000 non-null  string 
 2   click_rate  225000 non-null  float32
dtypes: float32(1), string(2)
memory usage: 4.3 MB


In [None]:
# df = dataset[:9]
# df.head(9)
# ds = tf.data.Dataset.from_tensor_slices(dict(df))

# for row in ds.take(1):
#     print(row)

#resultset
#{'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'1'>, 'item': <tf.Tensor: shape=(), dtype=string, numpy=b'AccountView'>, 'click_rate': <tf.Tensor: shape=(), dtype=float32, numpy=0.18968216>}

In [5]:
ds = tf.data.Dataset.from_tensor_slices(dict(dataset))

In [6]:
tf.random.set_seed(42)
shuffled = ds.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

In [7]:
items = ds.batch(1_000_000).map(lambda x: x["item"])
user_ids = ds.batch(1_000_000).map(lambda x: x["user_id"])

unique_items = np.unique(np.concatenate(list(items)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [8]:
print(unique_items[:9])

[b'Item01' b'Item02' b'Item03' b'Item04' b'Item05' b'Item06' b'Item07'
 b'Item08' b'Item09']


In [9]:
print(unique_user_ids[:10])

[b'1' b'10' b'100' b'1000' b'10000' b'10001' b'10002' b'10003' b'10004'
 b'10005']


In [10]:
class RankingModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        # Compute embeddings for users.
        self.user_embeddings = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_user_ids, mask_token=None),
          tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        # Compute embeddings for movies.
        self.item_embeddings = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_items, mask_token=None),
          tf.keras.layers.Embedding(len(unique_items) + 1, embedding_dimension)
        ])

        # Compute predictions.
        self.ratings = tf.keras.Sequential([
          # Learn multiple dense layers.
          tf.keras.layers.Dense(256, activation="relu"),
          tf.keras.layers.Dense(64, activation="relu"),
          # Make rating predictions in the final layer.
          tf.keras.layers.Dense(1)
      ])

    def call(self, inputs):
        
        user_id, item = inputs

        user_embedding = self.user_embeddings(user_id)
        item_embedding = self.item_embeddings(item)

        return self.ratings(tf.concat([user_embedding, item_embedding], axis=1))

In [11]:
RankingModel()((["10006"], ["Item07"]))



<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.03095034]], dtype=float32)>

In [12]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [13]:
class MLRecommenderModel(tfrs.models.Model):
    
    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
          loss = tf.keras.losses.MeanSquaredError(),
          metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )

    def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
        return self.ranking_model((features["user_id"], features["item"]))

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        labels = features.pop("click_rate")

        rating_predictions = self(features)

        # The task computes the loss and the metrics.
        return self.task(labels=labels, predictions=rating_predictions)

In [14]:
model = MLRecommenderModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [15]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [16]:
model.fit(cached_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1c8b1f27048>

In [17]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 0.0725601464509964,
 'loss': 0.005259113851934671,
 'regularization_loss': 0,
 'total_loss': 0.005259113851934671}

In [18]:
test_ratings = {}
test_items = ["Item01", "Item02", "Item03"]
for item in test_items:
    test_ratings[item] = model({
          "user_id": np.array(["10006"]),
          "item": np.array([item])
      })

print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

Ratings:
Item01: [[0.12994401]]
Item02: [[0.1227464]]
Item03: [[0.12235735]]


In [19]:
tf.saved_model.save(model, "\models\export")



INFO:tensorflow:Assets written to: C:\DS-development\GitHub\ML-Recommendation-Tensorflow\models\export\assets


INFO:tensorflow:Assets written to: C:\DS-development\GitHub\ML-Recommendation-Tensorflow\models\export\assets


In [20]:
loaded = tf.saved_model.load("\models\export")

loaded({"user_id": np.array(["600"]), "item": ["Item06"]}).numpy()

array([[0.13206184]], dtype=float32)

In [21]:
converter = tf.lite.TFLiteConverter.from_saved_model("\models\export")
tflite_model = converter.convert()
open("\models\tflite\converted_model.tflite", "wb").write(tflite_model)

3753000

In [22]:
interpreter = tf.lite.Interpreter(model_path="\models\tflite\converted_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model.
if input_details[0]["name"] == "serving_default_item:0":
    interpreter.set_tensor(input_details[0]["index"], np.array(["Item06"]))
    interpreter.set_tensor(input_details[1]["index"], np.array(["600"]))
else:
    interpreter.set_tensor(input_details[0]["index"], np.array(["600"]))
    interpreter.set_tensor(input_details[1]["index"], np.array(["Item06"]))

interpreter.invoke()

rating = interpreter.get_tensor(output_details[0]['index'])
print(rating)

[[0.13206184]]


In [23]:
print(input_details)

[{'name': 'serving_default_item:0', 'index': 0, 'shape': array([1]), 'shape_signature': array([-1]), 'dtype': <class 'numpy.bytes_'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}, {'name': 'serving_default_user_id:0', 'index': 1, 'shape': array([1]), 'shape_signature': array([-1]), 'dtype': <class 'numpy.bytes_'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
