# Training TFRS

In [1]:
from typing import Dict, Any, Text

import numpy as np 
import pandas as pd

import tensorflow as tf
import tensorflow_recommenders as tfrs

## **Reading in the Data** 

In [2]:
train_df = pd.read_csv('train.csv', dtype={'user_no': str, 'item_no': str})
test_df = pd.read_csv('test.csv', dtype={'user_no': str, 'item_no': str})

In [23]:
train_df_filtered = train_df.loc[train_df['user_no'].isin(train_df['user_no'].value_counts()[:1000].index)]

In [24]:
train_dataset = tf.data.Dataset.from_tensor_slices(dict(train_df_filtered))
test_dataset = tf.data.Dataset.from_tensor_slices(dict(test_df))

In [5]:
for elem in train_dataset.take(3):
    print(elem)

{'user_no': <tf.Tensor: shape=(), dtype=string, numpy=b'9060639138425951676'>, 'item_no': <tf.Tensor: shape=(), dtype=string, numpy=b'-478270421339298398'>, 'gender_description': <tf.Tensor: shape=(), dtype=string, numpy=b'boys'>, 'brand': <tf.Tensor: shape=(), dtype=string, numpy=b'aden + anais'>, 'product_group': <tf.Tensor: shape=(), dtype=string, numpy=b'bedding'>}
{'user_no': <tf.Tensor: shape=(), dtype=string, numpy=b'9060639138425951676'>, 'item_no': <tf.Tensor: shape=(), dtype=string, numpy=b'-4352133231638554813'>, 'gender_description': <tf.Tensor: shape=(), dtype=string, numpy=b'boys'>, 'brand': <tf.Tensor: shape=(), dtype=string, numpy=b'ralph lauren'>, 'product_group': <tf.Tensor: shape=(), dtype=string, numpy=b'jumpers and knitwear'>}
{'user_no': <tf.Tensor: shape=(), dtype=string, numpy=b'9060639138425951676'>, 'item_no': <tf.Tensor: shape=(), dtype=string, numpy=b'3628487599004239534'>, 'gender_description': <tf.Tensor: shape=(), dtype=string, numpy=b'unisex'>, 'brand': 

In [34]:
unique_users = train_df_filtered['user_no'].unique()
unique_items = train_df_filtered['item_no'].unique()

print(len(unique_users))
print(len(unique_items))

1000
10432


In [35]:
EMBEDDING_DIM = 32
NUM_OOV_INDICES = 1

In [36]:
user_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_users, 
      num_oov_indices=NUM_OOV_INDICES),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_users) + NUM_OOV_INDICES, EMBEDDING_DIM)
])

In [37]:
item_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_items, 
      num_oov_indices=NUM_OOV_INDICES),
  tf.keras.layers.Embedding(len(unique_items) + NUM_OOV_INDICES, EMBEDDING_DIM)
])

In [38]:
item_dataset = train_dataset.map(lambda x: x['item_no'])

metrics = tfrs.metrics.FactorizedTopK(
  candidates=item_dataset.batch(128).map(item_model)
)

In [39]:
task = tfrs.tasks.Retrieval(
  metrics=metrics
)

In [40]:
class SimpleTFRSModel(tfrs.Model):

    def __init__(self, user_model, item_model, task):
        super().__init__()
        self.user_model: tf.keras.Model = user_model
        self.item_model: tf.keras.Model = item_model
        self.task: tf.keras.layers.Layer = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        # We pick out the user features and pass them into the user model.
        user_embeddings = self.user_model(features["user_no"])
        # And pick out the movie features and pass them into the movie model,
        # getting embeddings back.
        positive_item_embeddings = self.item_model(features["item_no"])

        # The task computes the loss and the metrics.
        return self.task(user_embeddings, positive_item_embeddings)

In [41]:
model = SimpleTFRSModel(user_model, item_model, task)
model.compile(optimizer=tf.keras.optimizers.Adam())

In [42]:
train_dataset_interactions = train_dataset.map(lambda x: {
    'user_no': x['user_no'],
    'item_no': x['item_no']
})
test_dataset_interactions = test_dataset.map(lambda x: {
    'user_no': x['user_no'],
    'item_no': x['item_no']
})

In [43]:
cached_train = train_dataset_interactions.shuffle(100_000).batch(1024).cache()
cached_test = test_dataset_interactions.batch(512).cache()

In [45]:
history = model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [47]:
# Create a model that takes in raw query features, and
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
# recommends movies out of the entire items dataset.
index.index_from_dataset(
  tf.data.Dataset.zip((item_dataset.batch(100), item_dataset.batch(100).map(model.item_model)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x13e8ef310>

In [49]:
train_df_filtered

Unnamed: 0,user_no,item_no,gender_description,brand,product_group
366,8611951538862997486,-5730313128884875484,boys,air jordan,clothing sets
367,8611951538862997486,8568137491440306643,boys,boss,bottoms
368,8611951538862997486,2548468278650164360,unisex,boss,tops
369,8611951538862997486,6475930040942565267,boys,ralph lauren,tops
370,8611951538862997486,-247398092582614634,boys,air jordan,clothing sets
...,...,...,...,...,...
578340,-1471686302949907949,-3282703495377499121,unisex,britax,strollers
578341,-1471686302949907949,4109825327457723985,unisex,didriksons,gloves and mittens
578342,-1471686302949907949,5766067121942017613,unisex,didriksons,gloves and mittens
578343,-1471686302949907949,-4377679585094127757,unisex,didriksons,gloves and mittens


In [51]:
# Get recommendations.
_, titles = index(tf.constant(["8611951538862997486"]))
print(f"Recommendations for user 8611951538862997486: {titles[0, :3]}")

Recommendations for user 8611951538862997486: [b'3189056047292869834' b'3189056047292869834' b'9082322682150583002']


In [69]:
_, titles = index.query_with_exclusions(tf.constant(["8611951538862997486"]), 
                                       tf.constant([items_to_exclude]))

In [52]:
item_info_df = pd.read_csv('item_info.csv', dtype={'item_no': str})

In [58]:
recommendations = [item.numpy().decode() for item in titles[0]]

In [70]:
titles[0]

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'-4719306461609634737', b'-4719306461609634737',
       b'-4719306461609634737', b'-4719306461609634737',
       b'-4719306461609634737', b'-4719306461609634737',
       b'-4719306461609634737', b'-4719306461609634737',
       b'-4719306461609634737', b'-4719306461609634737'], dtype=object)>

In [60]:
item_info_df.loc[item_info_df['item_no'].isin(recommendations)]

Unnamed: 0,item_no,colour,gender_description,brand,product_group,min_age,max_age
24312,9082322682150583002,red,boys,adidas,clothing sets,0.125,4.0
28453,3189056047292869834,red,boys,air jordan,all in ones,0.125,0.625
32158,8568137491440306643,navy,boys,boss,bottoms,0.375,3.0
51617,6475930040942565267,black,boys,ralph lauren,tops,1.0,14.0


In [72]:
item_info_df.loc[item_info_df['item_no'] == '-4719306461609634737']

Unnamed: 0,item_no,colour,gender_description,brand,product_group,min_age,max_age
27828,-4719306461609634737,pink,unisex,kuling,gloves and mittens,0.125,7.0


In [66]:
items_to_exclude = train_df_filtered.loc[train_df_filtered['user_no'] == '8611951538862997486']['item_no'].unique()

In [68]:
tf.constant([items_to_exclude])

<tf.Tensor: shape=(1, 14), dtype=string, numpy=
array([[b'-5730313128884875484', b'8568137491440306643',
        b'2548468278650164360', b'6475930040942565267',
        b'-247398092582614634', b'3189056047292869834',
        b'9082322682150583002', b'-2704888249198936738',
        b'-3644029383138024689', b'-7476420848659739392',
        b'-4866887541185258299', b'-500451012866804228',
        b'137006744928801487', b'-439802747482401384']], dtype=object)>

In [61]:
train_df_filtered.loc[train_df_filtered['user_no'] == '8611951538862997486']

Unnamed: 0,user_no,item_no,gender_description,brand,product_group
366,8611951538862997486,-5730313128884875484,boys,air jordan,clothing sets
367,8611951538862997486,8568137491440306643,boys,boss,bottoms
368,8611951538862997486,2548468278650164360,unisex,boss,tops
369,8611951538862997486,6475930040942565267,boys,ralph lauren,tops
370,8611951538862997486,-247398092582614634,boys,air jordan,clothing sets
371,8611951538862997486,3189056047292869834,boys,air jordan,all in ones
372,8611951538862997486,9082322682150583002,boys,adidas,clothing sets
373,8611951538862997486,-2704888249198936738,boys,adidas,clothing sets
374,8611951538862997486,-5730313128884875484,boys,air jordan,clothing sets
375,8611951538862997486,8568137491440306643,boys,boss,bottoms


In [46]:
3 + 3

6