# RUN

## Import Module

### For Optimization

In [7]:
!pip install bayesian-optimization

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
import numpy as np
from bayes_opt import BayesianOptimization

### For Model

In [9]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

[K     |████████████████████████████████| 85 kB 2.2 MB/s 
[K     |████████████████████████████████| 462 kB 32.6 MB/s 
[K     |████████████████████████████████| 4.2 MB 4.5 MB/s 
[?25h

In [10]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import pandas as pd

import tensorflow_recommenders as tfrs

# getting data
from google.colab import auth
import gspread
from google.auth import default
from gspread_dataframe import get_as_dataframe, set_with_dataframe

auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
cd /content/drive/MyDrive/Bangkit/Capstone/ML/ML-GH/Notogo-ML/"model_v2.2"

/content/drive/MyDrive/Bangkit/Capstone/ML/ML-GH/Notogo-ML/model_v2.2


In [13]:
print(os.getcwd())

/content/drive/MyDrive/Bangkit/Capstone/ML/ML-GH/Notogo-ML/model_v2.2


## Retrieval Model

### Preparing Dataset

In [14]:
import userFeatures
builder = tfds.builder('Userfeatures')
userFeatureDs = tfds.load('Userfeatures',split='train')

import wishEmbedding
builder = tfds.builder('Wishembedding')
wishEmbeddingDs = tfds.load('Wishembedding',split='train')

ratings = userFeatureDs.map(lambda x: {
    "location_name": x["location_name"],
    "user_id": x["user_id"],
    "add" : x["add"],
    "like" : x['like']
})
locations = wishEmbeddingDs.map(lambda x: x["location_name"])

for x in ratings.take(2).as_numpy_iterator():
  pprint.pprint(x)

[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/userfeatures/1.0.0...[0m




Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/userfeatures/1.0.0.incomplete3699XW/userfeatures-train.tfrecord*...:   0%|…

[1mDataset userfeatures downloaded and prepared to /root/tensorflow_datasets/userfeatures/1.0.0. Subsequent calls will reuse this data.[0m
[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/wishembedding/1.0.0...[0m




Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/wishembedding/1.0.0.incompleteXUW5HS/wishembedding-train.tfrecord*...:   0…

[1mDataset wishembedding downloaded and prepared to /root/tensorflow_datasets/wishembedding/1.0.0. Subsequent calls will reuse this data.[0m
{'add': 0, 'like': 1, 'location_name': b'JAKARTA', 'user_id': b'93'}
{'add': 0, 'like': 1, 'location_name': b'DUBAI', 'user_id': b'112'}


In [15]:
tf.random.set_seed(42)

NUM_DATA = ratings.__len__().numpy()

shuffled = ratings.shuffle(NUM_DATA, seed=42, reshuffle_each_iteration=False)

trainset_size = 0.8 * NUM_DATA

train = shuffled.take(trainset_size)
test = shuffled.skip(trainset_size).take(NUM_DATA - trainset_size)

location_name = locations.batch(1000)
user_ids = ratings.batch(1000).map(lambda x: x["user_id"])

unique_location_name = np.unique(np.concatenate(list(location_name)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

cached_train = train.shuffle(NUM_DATA).batch(512).cache()
cached_test = test.batch(256).cache()

### Model
change this if needed for a different model

In [16]:
class NoToGoModel(tfrs.models.Model):

  def __init__(self, rating_weight: float, like_weight: float,retrieval_weight: float) -> None:
    # We take the loss weights in the constructor: this allows us to instantiate
    # several model objects with different loss weights.

    super().__init__()

    embedding_dimension = 32

    # User and movie models.
    self.location_model: tf.keras.layers.Layer = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_location_name, mask_token=None),
      tf.keras.layers.Embedding(len(unique_location_name) + 1, embedding_dimension),
      tf.keras.layers.Dense(16, activation="relu")
    ])

    self.user_model: tf.keras.layers.Layer = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension),
      tf.keras.layers.Dense(16, activation="relu")
    ])

    # A small model to take in user and movie embeddings and predict ratings.
    # We can make this as complicated as we want as long as we output a scalar
    # as our prediction.
    self.rating_model = tf.keras.Sequential([
        tf.keras.layers.Dense(8, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(1, activation = "sigmoid"),
    ])

    self.like_model = tf.keras.Sequential([
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(1, activation = "sigmoid"),
    ])

    # The tasks.
    self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )

    self.like_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )
    self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=locations.batch(128).map(self.location_model)
        )
    )

    # The loss weights.
    self.rating_weight = rating_weight
    self.retrieval_weight = retrieval_weight
    self.like_weight = like_weight

  def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
    # We pick out the user features and pass them into the user model.
    user_embeddings = self.user_model(features["user_id"])
    # And pick out the movie features and pass them into the movie model.
    location_embeddings = self.location_model(features["location_name"])
    
    return (
        user_embeddings,
        location_embeddings,
        # We apply the multi-layered rating model to a concatentation of
        # user and movie embeddings.
        self.rating_model(
            tf.concat([user_embeddings,location_embeddings], axis=1)
        ),
        self.like_model(
            tf.concat([user_embeddings,location_embeddings], axis=1)
        ),
    )

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:

    ratings = features.pop("add","like")
    like = features.pop("like", "add")

    user_embeddings, location_embeddings, rating_predictions, like_predictions = self(features)

    # We compute the loss for each task.
    rating_loss = self.rating_task(
        labels=ratings,
        predictions=rating_predictions,
    )

    like_loss = self.like_task(
        labels=like,
        predictions=like_predictions,
    )
    retrieval_loss = self.retrieval_task(user_embeddings, location_embeddings)

    # And combine them using the loss weights.
    return (self.rating_weight * rating_loss
            + self.retrieval_weight * retrieval_loss + like_loss*self.like_weight)

## Bayes Optimization

In [17]:
def black_box(*params):
  """black box function for optimization used for Retrieval-specialized model"""
  # Initialize model
  model = NoToGoModel(rating_weight=0.0, like_weight = 0, retrieval_weight=1.0)
  model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

  # training model
  model.fit(cached_train, epochs=50)
  
  return result

In [6]:
param_bounds = {
    'xx' : (1,1),
    'xy' : (2,2),
}

optimizer = BayesianOptimization(
    f=black_box,
    pbounds=param_bounds,
    random_state=1,
)

In [None]:
optimizer.maximize(
    init_points=2, #How many steps of random exploration you want to perform. Random exploration can help by diversifying the exploration space.
    n_iter=100, #How many steps of bayesian optimization you want to perform. The more steps the more likely to find a good maximum you are.
)

In [None]:
print(optimizer.max)