In [None]:
import polars as pl
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
import numpy as np
import logging
import random

seed = 42
np.random.seed(seed)
random.seed(seed)
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.random.set_seed(seed)
print(tf.__version__)

In [None]:
from polimi.utils.tf_models.utils.build_sequences import build_history_seq

history = pl.read_parquet('/home/ubuntu/dataset/ebnerd_small/train/history.parquet')
behaviors = pl.read_parquet('/home/ubuntu/dataset/ebnerd_small/train/behaviors.parquet')
articles = pl.read_parquet('/home/ubuntu/dataset/ebnerd_small/articles.parquet')

history_seq = build_history_seq(history, articles)

history_seq.head(3)

In [None]:
from polimi.utils.tf_models.utils.build_sequences import build_sequences_seq
train_data = build_sequences_seq(history_seq=history_seq, window=20, stride=5)
train_data['topics'][0].shape

In [None]:
train_data.keys()

In [None]:
train_data['topics'][0][0].shape[1], train_data['subcategory'][0][0].shape[1], train_data['category'][0].shape

In [None]:
from polimi.utils.tf_models import TemporalHistorySequenceModel, TemporalHistoryClassificationModel
from polimi.utils._polars import reduce_polars_df_memory_size

model = TemporalHistorySequenceModel(
    seq_embedding_dims={
        # adding, for the moment, one dim more to cover missings in non one-hot vectors
        'topics': (78, 10, True),
        'subcategory': (174, 10, True),
        'category': (26, 10, False),
        'weekday': (8, 3, False),
        'hour_group': (7, 3, False),
        'sentiment_label': (4, 2, False)
    },
    seq_numerical_features=['scroll_percentage', 'read_time', 'premium'],
    n_recurrent_layers=1,
    recurrent_embedding_dim=64,
    l1_lambda=1e-4,
    l2_lambda=1e-4,
)

In [None]:
model._build()

In [None]:
model.plot()

In [None]:
model.fit(
    train_ds=train_data,
    batch_size=64,
    epochs=10,
    # target for (topics, subcategory, category)
    loss=[tfk.losses.BinaryCrossentropy(), tfk.losses.BinaryCrossentropy(), tfk.losses.CategoricalCrossentropy()],
    loss_weights=[0.5, 0.1, 0.4],
    optimizer=tfk.optimizers.Adam(learning_rate=1e-4)
)