# Modelling

## Set Up

In [1]:
import os
import pprint
import tempfile
import random
import math

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs
from typing import Dict, Text
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten

from sklearn.model_selection import train_test_split
# import tensorflow_datasets as tfds

random.seed(13)

## Data Wrangling

In [173]:
users_rating_model = pd.read_csv('user_ratings_clean_fix_4.csv', index_col=0)

In [174]:
users_rating_model.head()

Unnamed: 0,id_user,id_event,nama_event,lokasi,harga_tiket,durasi,timestamp,genre,nama_musisi,rating,lat,lng,id_event_organizer
0,528,2677,The Lonely Carnival,3043,819816,2,1675728000,Pop,Padi,5.0,37.416,-122.152,3
1,1164,1576,Acoustic Arena,1161,944923,2,1676332800,Pop,Payung Teduh,5.0,37.416,-122.152,11
2,1168,1737,Dance Drill,2404,1407616,3,1677888000,Pop,Kangen Band,5.0,40.722,-74.0,9
3,1992,180,Jakarta International Bazaar,3835,1336467,4,1679875200,Jazz,HIVI!,1.0,37.484,-122.149,46
4,695,105,Indonesia International Furniture Expo,1923,1163292,3,1675296000,Pop,Isyana Sarasvati,1.0,30.691,-88.044,15


In [175]:
ratings_df = users_rating_model.copy().drop(['lat', 'lng','lokasi'], axis=1)

In [176]:
ratings_df.head()

Unnamed: 0,id_user,id_event,nama_event,harga_tiket,durasi,timestamp,genre,nama_musisi,rating,id_event_organizer
0,528,2677,The Lonely Carnival,819816,2,1675728000,Pop,Padi,5.0,3
1,1164,1576,Acoustic Arena,944923,2,1676332800,Pop,Payung Teduh,5.0,11
2,1168,1737,Dance Drill,1407616,3,1677888000,Pop,Kangen Band,5.0,9
3,1992,180,Jakarta International Bazaar,1336467,4,1679875200,Jazz,HIVI!,1.0,46
4,695,105,Indonesia International Furniture Expo,1163292,3,1675296000,Pop,Isyana Sarasvati,1.0,15


In [177]:
events_model = pd.read_csv('events_clean_fix.csv', index_col=0)

In [178]:
events_model

Unnamed: 0,id_event,nama_event,id_event_organizer
0,75,Indonesia International Education & Training Expo,1
1,34,Prambanan Jazz Festival,2
2,71,Bali International Choir Festival,3
3,92,Indonesia International Motor Show,4
4,168,Yogyakarta International Gamelan Festival,5
...,...,...,...
3021,3022,Planet Fantasy Hub Festival,120
3022,3023,Gem of Magic Fantasy Festival,105
3023,3024,The Serial Fantasy King Festival,59
3024,3025,Ropes of Fantasy Kingdom,56


In [179]:
ratings_df[['id_user', 'id_event']] = ratings_df[['id_user', 'id_event']].astype(str)

In [180]:
ratings_df['id_event_organizer'] = ratings_df['id_event_organizer'].astype(str)

In [181]:
ratings_df.rating

0        5.0
1        5.0
2        5.0
3        1.0
4        1.0
        ... 
19995    5.0
19996    5.0
19997    5.0
19998    5.0
19999    5.0
Name: rating, Length: 20000, dtype: float64

In [182]:
ratings_df.rating = ratings_df.rating.apply(math.ceil)

In [183]:
ratings_df.rating

0        5
1        5
2        5
3        1
4        1
        ..
19995    5
19996    5
19997    5
19998    5
19999    5
Name: rating, Length: 20000, dtype: int64

In [184]:
ratings_df[ratings_df["rating"] > 5]

Unnamed: 0,id_user,id_event,nama_event,harga_tiket,durasi,timestamp,genre,nama_musisi,rating,id_event_organizer


In [185]:
ratings_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20000 entries, 0 to 19999
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id_user             20000 non-null  object
 1   id_event            20000 non-null  object
 2   nama_event          20000 non-null  object
 3   harga_tiket         20000 non-null  int64 
 4   durasi              20000 non-null  int64 
 5   timestamp           20000 non-null  int64 
 6   genre               20000 non-null  object
 7   nama_musisi         20000 non-null  object
 8   rating              20000 non-null  int64 
 9   id_event_organizer  20000 non-null  object
dtypes: int64(4), object(6)
memory usage: 1.7+ MB


In [186]:
events_df = events_model.loc[:, ['id_event', 'nama_event']].astype(str)

In [187]:
events_df.head()

Unnamed: 0,id_event,nama_event
0,75,Indonesia International Education & Training Expo
1,34,Prambanan Jazz Festival
2,71,Bali International Choir Festival
3,92,Indonesia International Motor Show
4,168,Yogyakarta International Gamelan Festival


## Df to TFDS

In [188]:
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df[['id_user', 'nama_event', 'harga_tiket', 'durasi', 'genre', 'rating', 'id_event_organizer']]))
events =  tf.data.Dataset.from_tensor_slices(dict(events_df[['nama_event']]))

ratings = ratings.map(lambda x:{
    'nama_event' : x['nama_event'],
    'id_user' : x['id_user'],
    'genre' : x['genre'],
    'harga_tiket' : float(x['harga_tiket']),
    'durasi' :float(x['durasi']),
    'id_event_organizer' : x['id_event_organizer'],
    'rating':float(x['rating'])
})

events = events.map(lambda x: x['nama_event'])

In [189]:
'Total_data : {}'.format(len(ratings))

'Total_data : 20000'

In [190]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = ratings.take(15_000)
test = ratings.skip(15_000).take(5_000)

In [191]:
events_name = events.batch(1_000)
user_ids = ratings.batch(1_000).map(lambda x: x["id_user"])

unique_events_name = np.unique(np.concatenate(list(events_name)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [192]:
print('Unique Events: {}'.format(len(unique_events_name)))
print('Unique users: {}'.format(len(unique_user_ids)))

Unique Events: 1697
Unique users: 2099


# Tfrs with 1 embedding title

In [193]:
class EventModel(tfrs.models.Model):

  def __init__(self, rating_weight: float, retrieval_weight: float) -> None:
    # We take the loss weights in the constructor: this allows us to instantiate
    # several model objects with different loss weights.

    super().__init__()

    embedding_dimension = 70

    # User and movie models.
    self.event_model: tf.keras.layers.Layer = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_events_name, mask_token=None),
      tf.keras.layers.Embedding(len(unique_events_name) + 1, embedding_dimension)
    ])
    self.user_model: tf.keras.layers.Layer = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # A small model to take in user and movie embeddings and predict ratings.
    # We can make this as complicated as we want as long as we output a scalar
    # as our prediction.
    self.rating_model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1),
    ])

    # The tasks.
    self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )
    self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=events.batch(128).map(self.event_model)
        )
    )

    # The loss weights.
    self.rating_weight = rating_weight
    self.retrieval_weight = retrieval_weight

  def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
    # We pick out the user features and pass them into the user model.
    user_embeddings = self.user_model(features["id_user"])
    # And pick out the movie features and pass them into the movie model.
    event_embeddings = self.event_model(features["nama_event"])
    
    return (
        user_embeddings,
        event_embeddings,
        # We apply the multi-layered rating model to a concatentation of
        # user and movie embeddings.
        self.rating_model(
            tf.concat([user_embeddings, event_embeddings], axis=1)
        ),
    )

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:

    ratings = features.pop("rating")

    user_embeddings, event_embeddings, rating_predictions = self(features)

    # We compute the loss for each task.
    rating_loss = self.rating_task(
        labels=ratings,
        predictions=rating_predictions,
    )
    retrieval_loss = self.retrieval_task(user_embeddings, event_embeddings)

    # And combine them using the loss weights.
    return (self.rating_weight * rating_loss
            + self.retrieval_weight * retrieval_loss)

In [194]:
model = EventModel(rating_weight=1.0, retrieval_weight=1.0)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

cached_train = train.shuffle(100_000).batch(1_000).cache()
cached_test = test.batch(1_000).cache()

model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x15bbe9cfe50>

In [195]:
metrics = model.evaluate(cached_test, return_dict=True)

print(f"\nRetrieval top-100 accuracy: {metrics['factorized_top_k/top_100_categorical_accuracy']:.3f}")
print(f"Ranking RMSE: {metrics['root_mean_squared_error']:.3f}")


Retrieval top-100 accuracy: 0.025
Ranking RMSE: 1.479


In [196]:
# model.save_weights('model_1.h5')

In [197]:
def predict_event(user, top_n=3):
    # Create a model that takes in raw query features, and
    index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
    # recommends movies out of the entire movies dataset.
    index.index_from_dataset(
      tf.data.Dataset.zip((events.batch(100), events.batch(100).map(model.event_model)))
    )

    # Get recommendations.
    _, titles = index(tf.constant([str(user)]))
    
    print('Top {} recommendations for user {}:\n'.format(top_n, user))
    for i, title in enumerate(titles[0, :top_n].numpy()):
        print('{}. {}'.format(i+1, title.decode("utf-8")))

def predict_rating(user, event):
    trained_movie_embeddings, trained_user_embeddings, predicted_rating = model({
          "id_user": np.array([str(user)]),
          "nama_event": np.array([event])
      })
    print("Predicted rating for {}: {}".format(event, predicted_rating.numpy()[0][0]))

In [198]:
users_rating_model['id_user'].unique()

array([ 528, 1164, 1168, ...,  231,  622, 1633], dtype=int64)

In [199]:
 users_rating_model.loc[users_rating_model['id_user']==777, ['id_user','nama_event','rating']]

Unnamed: 0,id_user,nama_event,rating
1903,777,The Devil’s Horns Blowout,5.0
5843,777,Borobudur Writers & Cultural Festival,1.0
6243,777,The Wanderers Festival,2.0
7613,777,Vintageer Sounds,2.0
8151,777,Opera Operation,5.0
8563,777,Lavender Isles,5.0
10472,777,Funk Groover,5.0
11062,777,Euphony Affairs,4.0
14905,777,Bouncy Bacchanal,5.0
18637,777,Music Talent Festival,5.0


In [200]:
users_rating_model.groupby(['id_user', 'nama_event'])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000015BBE9D72B0>

In [201]:
predict_event(5, 20)

Top 20 recommendations for user 5:

1. Jazz Halo
2. Jazz Halo
3. Jazz Halo
4. Jazz Halo
5. Jazz Halo
6. Dance Zone
7. Dance Zone
8. Dance Zone
9. Dance Zone
10. Cosmopolitan Composition


In [202]:
predict_rating(777,'Jakarta International Handicraft Trade Fair')

Predicted rating for Jakarta International Handicraft Trade Fair: 0.6032429933547974


In [203]:
predict_rating(777,'Yogyakarta Batik Festival')

Predicted rating for Yogyakarta Batik Festival: 1.5709630250930786


# DCN Model

In [204]:
import pprint

%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

import numpy as np
import tensorflow as tf
# import tensorflow_datasets as tfds
from sklearn import preprocessing

import tensorflow_recommenders as tfrs

In [205]:
class Model(tfrs.Model):

  def __init__(self, model):
    super().__init__()
    self._model = model
    self._logit_layer = tf.keras.layers.Dense(1)

    self.task = tfrs.tasks.Ranking(
      loss=tf.keras.losses.MeanSquaredError(),
      metrics=[
        tf.keras.metrics.RootMeanSquaredError("RMSE")
      ]
    )

  def call(self, x):
    x = self._model(x)
    return self._logit_layer(x)

  def compute_loss(self, features, training=False):
    x, labels = features
    scores = self(x)

    return self.task(
        labels=labels,
        predictions=scores,
    )

In [206]:
crossnet = Model(tfrs.layers.dcn.Cross())
deepnet = Model(
    tf.keras.Sequential([
      tf.keras.layers.Dense(512, activation="relu"),
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(128, activation="relu")
    ])
)

In [207]:
genre = ratings_df['genre'].unique()

In [208]:
len(genre)

7

In [209]:
genre

array(['Pop', 'Jazz', 'Dangdut', 'Electronic', 'R&B', 'Rock', 'Indie'],
      dtype=object)

In [210]:
label_encoder = preprocessing.LabelEncoder()

In [211]:
ratings_df['genre']=label_encoder.fit_transform(ratings_df['genre'])

In [212]:
ratings_df['genre'].unique()

array([4, 3, 0, 1, 5, 6, 2])

In [213]:
label_encoder.transform(['Pop'])

array([4])

In [214]:
ratings_df = ratings_df.rename(columns={'rating':'user_rating'})

In [215]:
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df[['id_user', 'id_event', 'nama_event', 'harga_tiket', 'durasi', 'genre',  'id_event_organizer','user_rating']]))
events =  tf.data.Dataset.from_tensor_slices(dict(events_df[['nama_event']]))

# ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df[['id_user', 'id_event', 'nama_event', 'durasi', 'genre',  'id_event_organizer','user_rating']]))
# events =  tf.data.Dataset.from_tensor_slices(dict(events_df[['nama_event']]))

# ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df[['id_user', 'harga_tiket', 'durasi', 'genre',  'id_event_organizer','user_rating']]))
# events =  tf.data.Dataset.from_tensor_slices(dict(events_df[['nama_event']]))

ratings = ratings.map(lambda x:{
    'nama_event' : x['nama_event'],
    'id_user' : x['id_user'],
    'genre' : int(x['genre']),
    'harga_tiket' : float(x['harga_tiket']),
    'durasi' :float(x['durasi']),
    'id_event' : x['id_event'],
    'id_event_organizer' : x['id_event_organizer'],
    'user_rating':float(x['user_rating'])
})

events = events.map(lambda x: x['nama_event'])

In [216]:
'Total_data : {}'.format(len(ratings))

'Total_data : 20000'

In [217]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = ratings.take(19_000)
test = ratings.skip(19_000).take(1_000)

In [218]:
feature_names = ['nama_event', 'id_user', 'id_event', 'genre', 'harga_tiket',
                'durasi', 'id_event_organizer']

# feature_names = ['nama_event', 'id_user', 'genre', 
#                 'durasi', 'id_event_organizer']

# feature_names = ['id_user', 'genre', 'harga_tiket',
#                 'durasi', 'id_event_organizer']


vocabularies = {}

for feature_name in feature_names:
  vocab = ratings.batch(1_000_000).map(lambda x: x[feature_name])
  vocabularies[feature_name] = np.unique(np.concatenate(list(vocab)))

In [219]:
tf.random.set_seed(42)

class DCN(tfrs.Model):

  def __init__(self, use_cross_layer, deep_layer_sizes, projection_dim=None):
    super().__init__()

    self.embedding_dimension = 32

    str_features = ['nama_event', 'id_user', 'id_event', 'id_event_organizer']
    int_features = ['genre','harga_tiket','durasi']
    
#     str_features = ['nama_event', 'id_user', 'id_event_organizer']
#     int_features = ['genre','durasi']
    
#     str_features = ['id_user', 'id_event_organizer']
#     int_features = ['genre','harga_tiket','durasi']

    self._all_features = str_features + int_features
    self._embeddings = {}

    # Compute embeddings for string features.
    for feature_name in str_features:
      vocabulary = vocabularies[feature_name]
      self._embeddings[feature_name] = tf.keras.Sequential(
          [tf.keras.layers.StringLookup(
              vocabulary=vocabulary, mask_token=None),
           tf.keras.layers.Embedding(len(vocabulary) + 1,
                                     self.embedding_dimension)
    ])

    # Compute embeddings for int features.
    for feature_name in int_features:
      vocabulary = vocabularies[feature_name]
      self._embeddings[feature_name] = tf.keras.Sequential(
          [tf.keras.layers.IntegerLookup(
              vocabulary=vocabulary, mask_token=None),
           tf.keras.layers.Embedding(len(vocabulary) + 1,
                                     self.embedding_dimension)
    ])

    if use_cross_layer:
      self._cross_layer = tfrs.layers.dcn.Cross(
          projection_dim=projection_dim,
          kernel_initializer="glorot_uniform")
    else:
      self._cross_layer = None

    self._deep_layers = [tf.keras.layers.Dense(layer_size, activation="relu")
      for layer_size in deep_layer_sizes]

    self._logit_layer = tf.keras.layers.Dense(1)

    self.task = tfrs.tasks.Ranking(
      loss=tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError("RMSE")]
    )

  def call(self, features):
    # Concatenate embeddings
    embeddings = []
    for feature_name in self._all_features:
      embedding_fn = self._embeddings[feature_name]
      embeddings.append(embedding_fn(features[feature_name]))

    x = tf.concat(embeddings, axis=1)

    # Build Cross Network
    if self._cross_layer is not None:
      x = self._cross_layer(x)

    # Build Deep Network
    for deep_layer in self._deep_layers:
      x = deep_layer(x)

    return self._logit_layer(x)

  def compute_loss(self, features, training=False):
    labels = features.pop("user_rating")
    scores = self(features)
    return self.task(
        labels=labels,
        predictions=scores,
    )

In [220]:
tf.random.set_seed(42)

cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [221]:
def run_models(use_cross_layer, deep_layer_sizes, projection_dim=None, num_runs=5):
  models = []
  rmses = []

  for i in range(num_runs):
    model = DCN(use_cross_layer=use_cross_layer,
                deep_layer_sizes=deep_layer_sizes,
                projection_dim=projection_dim)
    model.compile(optimizer=tf.keras.optimizers.Adam(0.01))
    models.append(model)

    model.fit(cached_train, epochs=8, verbose=False)
    metrics = model.evaluate(cached_test, return_dict=True)
    rmses.append(metrics["RMSE"])

  mean, stdv = np.average(rmses), np.std(rmses)

  return {"model": models, "mean": mean, "stdv": stdv}

In [222]:
epochs = 8
learning_rate = 0.01

In [223]:
dcn_result = run_models(use_cross_layer=True,
                        deep_layer_sizes=[192, 192])



- epochs 8, lr 0.01 =  1.1000
- epochs 10 =  1.2659

In [224]:
dcn_lr_result = run_models(use_cross_layer=True,
                           projection_dim=20,
                           deep_layer_sizes=[192, 192])



epochs 8 = 1.1451

In [225]:
dnn_result = run_models(use_cross_layer=False,
                        deep_layer_sizes=[192, 192, 192])



epochs 8 = 1.6964

In [226]:
print("DCN            RMSE mean: {:.4f}, stdv: {:.4f}".format(
    dcn_result["mean"], dcn_result["stdv"]))
print("DCN (low-rank) RMSE mean: {:.4f}, stdv: {:.4f}".format(
    dcn_lr_result["mean"], dcn_lr_result["stdv"]))
print("DNN            RMSE mean: {:.4f}, stdv: {:.4f}".format(
    dnn_result["mean"], dnn_result["stdv"]))

DCN            RMSE mean: 0.5895, stdv: 0.3081
DCN (low-rank) RMSE mean: 0.5301, stdv: 0.0982
DNN            RMSE mean: 1.1447, stdv: 0.1258


### Data fix 2
**Tanpa event_name feature**
- DCN            RMSE mean: 1.4269, stdv: 0.4206
- DCN (low-rank) RMSE mean: 1.3793, stdv: 0.2845
- DNN            RMSE mean: 2.2950, stdv: 0.6278

**Fetures lengkap**
- DCN            RMSE mean: 1.1822, stdv: 0.0348
- DCN (low-rank) RMSE mean: 1.1756, stdv: 0.0576
- DNN            RMSE mean: 1.9404, stdv: 0.4395

### Data fix 3

- DCN            RMSE mean: 1.3951, stdv: 0.0167
- DCN (low-rank) RMSE mean: 1.3745, stdv: 0.0323
- DNN            RMSE mean: 1.7456, stdv: 0.3024

### Data fix 4

- DCN            RMSE mean: 0.9175, stdv: 0.4032
- DCN (low-rank) RMSE mean: 0.7568, stdv: 0.0603
- DNN            RMSE mean: 2.2006, stdv: 0.4556

Data train 19.000
- DCN            RMSE mean: 0.5895, stdv: 0.3081
- DCN (low-rank) RMSE mean: 0.5301, stdv: 0.0982
- DNN            RMSE mean: 1.1447, stdv: 0.1258


In [227]:
model = DCN(use_cross_layer=True,
            projection_dim=20,
            deep_layer_sizes=[192, 192])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
model.fit(cached_train, epochs=epochs, verbose=False)

<keras.callbacks.History at 0x15b91124b50>

In [228]:
model.predict(cached_test)



array([[1.9948149],
       [2.6911776],
       [4.1947417],
       [4.46236  ],
       [3.3874931],
       [4.270931 ],
       [2.546702 ],
       [4.6900525],
       [4.2120633],
       [2.6658823],
       [4.5601835],
       [4.5123267],
       [4.4087543],
       [4.288321 ],
       [4.4293656],
       [4.1918073],
       [4.282169 ],
       [4.6247497],
       [1.1142228],
       [4.105821 ],
       [4.657616 ],
       [4.2951264],
       [4.4589987],
       [4.394216 ],
       [4.2626863],
       [4.7157655],
       [4.2017646],
       [3.439419 ],
       [1.3915036],
       [4.3802595],
       [4.051696 ],
       [4.4706073],
       [3.3393378],
       [4.2438154],
       [3.382625 ],
       [4.2800393],
       [2.7197313],
       [4.436844 ],
       [4.176309 ],
       [3.3349433],
       [1.797564 ],
       [4.29916  ],
       [1.3418839],
       [4.347187 ],
       [2.6545024],
       [4.629436 ],
       [1.1504211],
       [2.6442513],
       [4.340005 ],
       [3.531597 ],


In [229]:
def get_column_value(row):
    return row['user_rating']

In [230]:
model.summary()

Model: "dcn_34"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_252 (Sequential)  (None, 32)               160       
                                                                 
 sequential_250 (Sequential)  (None, 32)               256       
                                                                 
 sequential_251 (Sequential)  (None, 32)               95680     
                                                                 
 sequential_248 (Sequential)  (None, 32)               95808     
                                                                 
 sequential_249 (Sequential)  (None, 32)               6400      
                                                                 
 sequential_247 (Sequential)  (None, 32)               67200     
                                                                 
 sequential_246 (Sequential)  (None, 32)               53920

# Testing

In [374]:
test_1 = ratings_df[ratings_df['user_rating']==5].drop(['user_rating'], axis=1)

In [375]:
ratings = tf.data.Dataset.from_tensor_slices(dict(test_1[['id_user', 'id_event', 'nama_event', 'harga_tiket', 'durasi', 'genre',  'id_event_organizer']]))
# events =  tf.data.Dataset.from_tensor_slices(dict(test_1[['nama_event']]))

# ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df[['id_user', 'id_event', 'nama_event', 'durasi', 'genre',  'id_event_organizer','user_rating']]))
# events =  tf.data.Dataset.from_tensor_slices(dict(events_df[['nama_event']]))

# ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_df[['id_user', 'harga_tiket', 'durasi', 'genre',  'id_event_organizer','user_rating']]))
# events =  tf.data.Dataset.from_tensor_slices(dict(events_df[['nama_event']]))

ratings = ratings.map(lambda x:{
    'durasi' :float(x['durasi']),
    'genre' : int(x['genre']),
    'harga_tiket' : float(x['harga_tiket']),
    'id_event' : x['id_event'],
    'id_event_organizer' : x['id_event_organizer'],
    'id_user' : x['id_user'],
    'nama_event' : x['nama_event'],
#     'user_rating':float(x['user_rating'])
})

# events = events.map(lambda x: x['nama_event'])

{'durasi': TensorSpec(shape=(None,), dtype=tf.float32, name='durasi'),
 'genre': TensorSpec(shape=(None,), dtype=tf.int32, name='genre'),
 'harga_tiket': TensorSpec(shape=(None,), dtype=tf.float32, name='harga_tiket'),
 'id_event': TensorSpec(shape=(None,), dtype=tf.string, name='id_event'),
 'id_event_organizer': TensorSpec(shape=(None,), dtype=tf.string, name='id_event_organizer'),
 'id_user': TensorSpec(shape=(None,), dtype=tf.string, name='id_user'),
 'nama_event': TensorSpec(shape=(None,), dtype=tf.string, name='nama_event')}
  Keyword arguments: {'training': False}

In [376]:
test_1

Unnamed: 0,id_user,id_event,nama_event,harga_tiket,durasi,timestamp,genre,nama_musisi,id_event_organizer
0,528,2677,The Lonely Carnival,819816,2,1675728000,4,Padi,3
1,1164,1576,Acoustic Arena,944923,2,1676332800,4,Payung Teduh,11
2,1168,1737,Dance Drill,1407616,3,1677888000,4,Kangen Band,9
6,141,2522,The Ultimate Spring Fest,1294087,3,1680048000,4,HIVI!,5
7,1190,2121,Queen’s Music Mela,868304,2,1676592000,4,Sheila On 7,22
...,...,...,...,...,...,...,...,...,...
19995,2077,1765,Beats Bacchanal,356083,4,1678406400,1,Repvblik,103
19996,1378,2541,Old Spring Laneway Festival,97498,3,1672617600,0,Agnes Monica,115
19997,1053,1899,Zero Point Music,455380,1,1673654400,2,Gigi,69
19998,1153,1450,Lost Lands,146014,4,1677456000,6,Payung Teduh,171


In [377]:
# tf.random.set_seed(42)
# shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

# train = ratings.take(15_000)
# test = ratings.skip(15_000).take(5_000)

In [378]:
feature_names = ['nama_event', 'id_user', 'id_event', 'genre', 'harga_tiket',
                'durasi', 'id_event_organizer']

# feature_names = ['nama_event', 'id_user', 'genre', 
#                 'durasi', 'id_event_organizer']

# feature_names = ['id_user', 'genre', 'harga_tiket',
#                 'durasi', 'id_event_organizer']


vocabularies = {}

for feature_name in feature_names:
  vocab = ratings.batch(1_000_000).map(lambda x: x[feature_name])
  vocabularies[feature_name] = np.unique(np.concatenate(list(vocab)))

In [379]:
cached_test = ratings.batch(4096).cache()

In [380]:
model.predict(cached_test)



array([[4.3892994],
       [4.3258405],
       [4.2658224],
       ...,
       [4.2469993],
       [4.4971166],
       [4.2469788]], dtype=float32)

In [381]:
model.save("content_model", save_format='tf')



INFO:tensorflow:Assets written to: content_model\assets


INFO:tensorflow:Assets written to: content_model\assets


In [2]:
loaded_model = tf.keras.models.load_model('./content_model')

In [384]:
predicted_values = loaded_model.predict(cached_test)



In [385]:
predicted_values

array([[4.3893   ],
       [4.3258405],
       [4.265822 ],
       ...,
       [4.246999 ],
       [4.497116 ],
       [4.2469788]], dtype=float32)

In [394]:
len(predicted_values)

15525

In [395]:
predicted_values.flatten()

array([4.3893   , 4.3258405, 4.265822 , ..., 4.246999 , 4.497116 ,
       4.2469788], dtype=float32)

# Variable Importance


In [3]:
layer = loaded_model.layers[1]