In [None]:
!pip install -q tensorflow-recommenders
!pip install -q scann

In [None]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_recommenders as tfrs

import os
import pprint
import tempfile

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import pathlib
from google.colab import files



In [None]:
users = pd.read_csv('https://raw.githubusercontent.com/mutakin-san/valo.ai/main/machine-learning/dataset/user_valo_mod_str.csv')
users.head()

In [None]:
grouped_data_dict = users.groupby(['user_id', 'vac_booster'])['vac_1', 'vac_2'].sum().reset_index()

grouped_data_dict = {name: np.array(value) for name, value in grouped_data_dict.items()}
grouped_data = tf.data.Dataset.from_tensor_slices(grouped_data_dict)

booster_dict = users[['vac_booster']].drop_duplicates()
booster_dict = {name: np.array(value) for name, value in booster_dict.items()}
booster = tf.data.Dataset.from_tensor_slices(booster_dict)

grouped_data = grouped_data.map(lambda x: {
    'user_id' : str(x['user_id']), 
    'vac_booster' : x['vac_booster'], 
    'vac_1' : x['vac_1'],
    'vac_2' : x['vac_2'],

})

booster = booster.map(lambda x: x['vac_booster'])

In [None]:
booster

In [None]:
unique_boosters = np.unique(np.concatenate(list(booster.batch(1000))))
unique_user_ids = np.unique(np.concatenate(list(grouped_data.batch(1_000).map(lambda x: x["user_id"]))))
print(unique_boosters)

In [None]:
tf.random.set_seed(42)
shuffled = grouped_data.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(60_000)
test = shuffled.skip(60_000).take(40_000)

In [None]:
class VaccineModel(tfrs.Model):
  def __init__(self, user_model, booster_model):
    super().__init__()
    booster_model = tf.keras.Sequential([
                                      tf.keras.layers.experimental.preprocessing.StringLookup(
                                          vocabulary=unique_boosters, mask_token=None),
                                      tf.keras.layers.Embedding(len(unique_boosters) + 1, embedding_dimension)
    ])
    self.booster_model: tf.keras.Model = booster_model

    user_model = tf.keras.Sequential([
                                      tf.keras.layers.experimental.preprocessing.StringLookup(
                                          vocabulary=unique_user_ids, mask_token=None),
                                      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])
    self.user_model: tf.keras.Model = user_model

    metrics = tfrs.metrics.FactorizedTopK(
      candidates=booster.batch(512).map(booster_model)
    )

    task = tfrs.tasks.Retrieval(metrics=metrics)
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings = self.user_model(features['user_id'])
    positive_vac_embeddings = self.booster_model(features['vac_booster'])
    return self.task(user_embeddings, positive_vac_embeddings)

In [None]:
embedding_dimension = 32

booster_model = tf.keras.Sequential([
                                  tf.keras.layers.experimental.preprocessing.StringLookup(
                                      vocabulary=unique_boosters, mask_token=None),
                                  tf.keras.layers.Embedding(len(unique_boosters) + 1, embedding_dimension)
])

user_model = tf.keras.Sequential([
                                  tf.keras.layers.experimental.preprocessing.StringLookup(
                                      vocabulary=unique_user_ids, mask_token=None),
                                  tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

model = VaccineModel(user_model, booster_model)

model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.01))
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

history = model.fit(cached_train, epochs=2)
model.evaluate(cached_test, return_dict=True)

In [None]:
epochs = [i for i in range(2)]

plt.plot(epochs, history.history["factorized_top_k/top_5_categorical_accuracy"], label="accuracy")
plt.title("Accuracy vs epoch")
plt.xlabel("epoch")
plt.ylabel("Top-100 accuracy");
plt.legend()

In [None]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model, k=2)
index.index_from_dataset(
    booster.batch(128).map(lambda title: (title, model.booster_model(title)))
)

In [None]:
_, vac = index(tf.constant(["42"]))
print(f"Recommendations for user 42: {vac[0]}")

In [None]:
# Export the query model.
with tempfile.TemporaryDirectory() as tmp:
  path = os.path.join(tmp, "model")

  # Save the index.
  tf.saved_model.save(index, path)

  # Load it back; can also be done in TensorFlow Serving.
  loaded = tf.saved_model.load(path)

  # Pass a user id in, get top predicted vaccines
  scores, vaccines = loaded(["42"])

  print(f"Recommendations: {vaccines[0][:3]}")

In [None]:
#Convert to saved_model.pb
export_dir = '/tmp/tmpq97dd3hj/model'
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()

#Convert to TFLITE
tflite_model_file = pathlib.Path('/tmp/vac.tflite')
tflite_model_file.write_bytes(tflite_model)

In [None]:
#download tflite files
#uncomment below for need
# files.download(tflite_model_file)