# ***CNN + RNN***

## Import Required Libs

In [26]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from keras.preprocessing.sequence import pad_sequences
from keras import Model
from keras.models import Sequential
from keras.layers import LayerNormalization, Embedding, Layer, Conv1D, ReLU, Concatenate, MaxPooling1D, Flatten, Dense, GlobalMaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, BatchNormalization, Activation, Dropout, LSTM, Bidirectional
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.initializers import HeNormal
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
)
from keras import backend as K

In [2]:
import os
import logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
logging.getLogger("tensorflow").setLevel(logging.ERROR)

In [None]:
import tensorflow as tf
print(tf.config.list_logical_devices())

In [None]:
import tensorflow as tf

print("TPU devices:", tf.config.experimental.list_logical_devices('TPU'))

In [3]:
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="local")
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    print("✅ TPU initialized")
except Exception as e:
    print("⚠️ No TPU, fallback:", e)
    strategy = tf.distribute.get_strategy()

print("Số replicas:", strategy.num_replicas_in_sync)

I0000 00:00:1756278628.035224      10 service.cc:148] XLA service 0x584c40c61330 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1756278628.035280      10 service.cc:156]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1756278628.035285      10 service.cc:156]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1756278628.035288      10 service.cc:156]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1756278628.035292      10 service.cc:156]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1756278628.035297      10 service.cc:156]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1756278628.035300      10 service.cc:156]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1756278628.035303      10 service.cc:156]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1756278628.035305      10 service.cc:156]   StreamExecutor device (7): TPU, 2a886c8


✅ TPU initialized
Số replicas: 8


## Load Processed Data

In [4]:
train_df = pd.read_csv('/kaggle/input/sentiment-classification-nmq/train.csv')
valid_df = pd.read_csv('/kaggle/input/sentiment-classification-nmq/valid.csv')
test_df = pd.read_csv('/kaggle/input/sentiment-classification-nmq/test.csv')

In [None]:
train_df.isna().sum()

## Load Pretrained Word2Vec

In [5]:
word2vec_file_path = '/kaggle/input/pho-word2vec/word2vec_vi_words_300dims.txt'

In [6]:
class Vocab:
    def __init__(self, w2v_file_path: str):
        self.word_index = {}
        self.embedding_matrix = []
        self.embedding_dim = None
        self.load_word2vec(w2v_file_path)
        self.embedding_matrix = np.array(self.embedding_matrix, dtype='float32')
        self.unk_id = self.word_index['<unk>']
        self.pad_id = self.word_index['<pad>']

    def load_word2vec(self, file_path: str):
        with open(file_path, 'r', encoding='utf-8') as f:
            first_line = f.readline().strip().split()
            if (len(first_line) == 2 and first_line[0].isdigit()):
                self.embedding_dim = int(first_line[1])
            else:
                f.seek(0)
            for line in f:
                values = line.strip().split()
                if len(values) < self.embedding_dim + 1:
                    continue
                word = '_'.join(values[: - self.embedding_dim]).lower()
                if word in self.word_index:
                    continue
                try:
                    vector = np.asarray(values[- self.embedding_dim :], dtype='float32')
                except ValueError:
                    print(f'Error line: {line.strip()}')
                    continue
                self.word_index[word] = len(self.word_index)
                self.embedding_matrix.append(vector)

        self.word_index['<unk>'] = len(self.word_index)
        self.embedding_matrix.append(np.random.uniform(-0.05, 0.05, self.embedding_dim))

        self.word_index['<pad>'] = len(self.word_index)
        self.embedding_matrix.append(np.zeros(self.embedding_dim))

        for word in ['happy', 'love', 'sad', 'angry', 'surprise', 'thinking', 'neutral']:
            if f'<{word}>' not in self.word_index:
                dot_id = self.word_index.get('.')
                word_id = self.word_index.get(word)
                if dot_id is None or word_id is None:
                    continue
                dot_vector = self.embedding_matrix[dot_id]
                word_vector = self.embedding_matrix[word_id]
                vector = (dot_vector + word_vector) / 2

                self.word_index[f'<{word}>'] = len(self.word_index)
                self.embedding_matrix.append(vector)

    def get_index(self, word: str) -> int:
        return self.word_index.get(word, self.unk_id)

In [7]:
vocab = Vocab(word2vec_file_path)

## Tokenize and Pad Sequences

In [None]:
lengths = train_df['sent'].apply(lambda x: len(x.split()))
plt.hist(lengths, bins=50, color='green')
plt.xlabel('Lengths')
plt.ylabel('Frequency')
plt.show()

In [8]:
MAX_LENGTH_SENT = 512

In [9]:
class Tokenizer:
    def __init__(self, vocab: Vocab, max_len: int):
        self.vocab = vocab
        self.max_len = max_len

    def encode(self, text):
        tokens = text.split()
        ids = [self.vocab.get_index(token) for token in tokens]
        return ids[:self.max_len]

    def pad_sequence(self, seq: list[int]) -> list[int]:
        if len(seq) < self.max_len:
            seq = seq + [self.vocab.pad_id] * (self.max_len - len(seq))
        return seq[:self.max_len]

    def encode_batch(self, texts: list[str]) -> np.ndarray:
        return np.array([self.pad_sequence(self.encode(text)) for text in texts])

In [10]:
tokenizer = Tokenizer(vocab, MAX_LENGTH_SENT)

## Label Encoder

In [11]:
class LabelEncoder:
    def __init__(self, labels: list['str']):
        self.label2id = {}
        self.id2label = {}
        for label in labels:
            self.label2id[label] = len(self.label2id)
            self.id2label[len(self.id2label)] = label

    def encode_batch(self, labels: list[str]):
        return [self.label2id[label] for label in labels]

    def decode_batch(self, ids: list[int]):
        return [self.id2label[id] for id in ids]

In [12]:
label_encoder = LabelEncoder(['POS', 'NEG', 'NEU'])

## Create Dataset

In [13]:
class Dataset:
    def __init__(self, df: pd.DataFrame, tokenizer: Tokenizer, label_encoder: LabelEncoder):
        self.df = df
        self.tokenizer = tokenizer
        self.label_encoder = label_encoder

    def build(self, batch_size=32, shuffle=True):
        X = self.tokenizer.encode_batch(self.df['sent'].tolist())
        y = self.label_encoder.encode_batch(self.df['sentiment'].tolist())

        X = np.array(X, dtype=np.int32)
        y = np.array(y, dtype=np.int32)

        dataset = tf.data.Dataset.from_tensor_slices((X, y))
        if shuffle:
            dataset = dataset.shuffle(buffer_size=len(X))
        dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
        return dataset

In [14]:
train_dataset = Dataset(
    df=train_df,
    tokenizer=tokenizer,
    label_encoder=label_encoder
).build(batch_size=64)

valid_dataset = Dataset(
    df=valid_df,
    tokenizer=tokenizer,
    label_encoder=label_encoder
).build(batch_size=64, shuffle=False)

test_dataset = Dataset(
    df=test_df,
    tokenizer=tokenizer,
    label_encoder=label_encoder
).build(batch_size=64, shuffle=False)

## Define Model

In [15]:
class InceptionRes1D(Layer):
    def __init__(
        self, 
        branch_filters: list[int], 
        kernel_sizes: list[int], 
        output_dim: int
    ):
        super().__init__()
        assert len(branch_filters) == len(kernel_sizes), 'filters and kernel_sizes must have same length'
        self.output_dim = output_dim
        self.input_norm = LayerNormalization()
        self.branches = []
        for f, k in zip(branch_filters, kernel_sizes):
            self.branches.append(Conv1D(filters=f, kernel_size=k, padding='same', activation=None, kernel_initializer=HeNormal()))
        self.conv1x1 = Conv1D(filters=output_dim, kernel_size=1, padding='same', activation=None, kernel_initializer=HeNormal())

        self.res_conv = None
        self.out_norm = LayerNormalization(axis=-1)

    def build(self, input_shape):
        input_dim = input_shape[-1]
        if input_dim != self.output_dim:
            self.res_conv = Conv1D(self.output_dim, 1, padding='same', activation=None)
        super().build(input_shape)

    def call(self, x, training=False):
        x = self.input_norm(x)
        branch_outputs = []
        for conv in self.branches:
            out = conv(x)
            out = tf.nn.gelu(out)
            branch_outputs.append(out)

        x_concat = tf.concat(branch_outputs, axis=-1)

        out = self.conv1x1(x_concat)
        
        residual = self.res_conv(x) if self.res_conv else x
            
        return tf.nn.gelu(self.out_norm(residual + out))

class CNNEncoder(Layer):
    def __init__(
        self,
        inception_configs=[
            {'branch_filters': [128, 64, 32], 'kernel_sizes': [2, 3, 5]},
            {'branch_filters': [256, 128, 64], 'kernel_sizes': [2, 3, 5]},
            {'branch_filters': [512, 256, 128], 'kernel_sizes': [2, 3, 5]},
        ],
        pooling_configs=[
            {'type': 'max', 'size': 5, 'strides': 3, 'padding': 'same'},
            {'type': 'max', 'size': 3, 'strides': 2, 'padding': 'same'},
            {'type': 'global_max'},
        ],
        res_output_dims=[300, 300, 300],
        fc_configs=[
            {'dim': 512, 'activation': 'gelu', 'dropout': 0.3},
            {'dim': 256, 'activation': 'gelu', 'dropout': 0.3}
        ]
    ):
        super(CNNEncoder, self).__init__()
        self.layers = []
        for inception_config, pooling_config, output_dim in zip(inception_configs, pooling_configs, res_output_dims):
            self.layers.append(InceptionRes1D(
                branch_filters=inception_config['branch_filters'],
                kernel_sizes=inception_config['kernel_sizes'],
                output_dim = output_dim
            ))
            if pooling_config['type'] == 'max':
                self.layers.append(MaxPooling1D(
                    pool_size=pooling_config['size'],
                    strides=pooling_config['strides'],
                    padding=pooling_config['padding']
                ))
            elif pooling_config['type'] == 'global_max':
                self.layers.append(GlobalMaxPooling1D())
            elif pooling_config['type'] == 'average':
                self.layers.append(AveragePooling1D(
                    pool_size=pooling_config['size'],
                    strides=pooling_config['strides']
                ))
            elif pooling_config['type'] == 'global_average':
                self.layers.append(GlobalAveragePooling1D())
        self.fcs = []
        for config in fc_configs:
            self.fcs.append(Dense(config['dim'], activation=config['activation'], kernel_initializer=HeNormal()))
            if 'dropout' in config:
                self.fcs.append(Dropout(config['dropout']))
        self.fcs = Sequential(self.fcs)

    def call(self, x, training=False):
        for layer in self.layers:
            if isinstance(layer, InceptionRes1D):
                x = layer(x, training=training)
            else:
                x = layer(x)
        pattern = self.fcs(x)    
        return pattern

In [16]:
class RNNEncoder(Layer):
    def __init__(
        self,
        lstm_hidden_dim,
        fc_configs
    ):
        super().__init__()
        self.bilstm = Bidirectional(LSTM(lstm_hidden_dim, return_sequences=True, dropout=0.3))
        self.lstm_post_norm = LayerNormalization()
        self.attn_fc = Dense(1, activation=None)
        self.fcs = []
        for config in fc_configs:
            self.fcs.append(Dense(config['dim'], activation=config['activation']))
            if 'dropout' in config:
                self.fcs.append(Dropout(config['dropout']))
        self.fcs = Sequential(self.fcs)
    def call(self, x, mask):
        hiddens = self.bilstm(x, mask=mask)
        hiddens = self.lstm_post_norm(hiddens)
        
        scores = self.attn_fc(hiddens)
        scores = tf.squeeze(scores, axis=-1)
        weights = tf.nn.softmax(scores, axis=-1)
        context = tf.reduce_sum(hiddens * tf.expand_dims(weights, -1), axis=-2)
        
        context = self.fcs(context)
        return hiddens, context

In [17]:
class Attention(Layer):
    def __init__(self, attention_dim):
        super().__init__()
        self.dense = Dense(attention_dim, activation='tanh')
        self.v = Dense(1, activation=None)

    def call(self, hiddens, pattern, mask):
        """
        Args:
            hiddens: [B, S, 2H]
            pattern: [B, P]
        Returns:
            context: [B, 2H]
            attn_weights: [B, S, 1]
        """
        
        S = tf.shape(hiddens)[1]
        
        # [B, P] -> [B, 1, P] -> [B, S, P]
        exp_pattern = tf.tile(tf.expand_dims(pattern, 1), [1, S, 1])

        # [B, S, 2H] cat [B, S, P] -> [B, S, 2H + P]
        score_input = tf.concat([hiddens, exp_pattern], axis=-1)

        # [B, S, 2H + P] -> [B, S, A] -> [B, S, 1]
        score = self.v(self.dense(score_input))

        if mask is not None:
            mask = tf.cast(mask, tf.float32)
            mask = tf.expand_dims(mask, -1)
            score += (1.0 - mask) * -1e9
        
        attn_weights = tf.nn.softmax(score, axis=1)

        # [B, S, 1] * [B, S, 2H] -> [B, S, 2H] -> [B, 2H]
        context = tf.reduce_sum(attn_weights * hiddens, axis=1)

        return context

In [18]:
class HybridModel(Model):
    def __init__(
        self,
        vocab,
        cnn_inception_configs,
        cnn_pooling_configs,
        cnn_output_dims,
        cnn_fc_configs,
        lstm_hidden_dim,
        rnn_fc_configs,
        attention_dim,
        fc_out_configs,
        output_dim,
        trainable_embeddings=False,
    ):
        super().__init__()
        self.vocab = vocab

        self.embedding = Embedding(
            input_dim=len(vocab.word_index),
            output_dim=vocab.embedding_dim,
            weights=[vocab.embedding_matrix],
            trainable=trainable_embeddings,
            mask_zero=False
        )

        self.embedding_adapter = Dense(vocab.embedding_dim, activation='gelu')
        
        self.cnn_encoder = CNNEncoder(
            inception_configs=cnn_inception_configs,
            pooling_configs=cnn_pooling_configs,
            res_output_dims=cnn_output_dims,
            fc_configs=cnn_fc_configs
        )
        
        self.rnn_encoder = RNNEncoder(
            lstm_hidden_dim=lstm_hidden_dim,
            fc_configs=rnn_fc_configs
        )
        
        self.attention = Attention(attention_dim=attention_dim)

        self.rnn_context_norm = LayerNormalization()
        self.cnn_context_norm = LayerNormalization()
        self.pattern_norm = LayerNormalization()

        self.fcs = []
        for config in fc_out_configs:
            self.fcs.append(Dense(config['dim'], activation=config['activation']))
            if 'dropout' in config:
                self.fcs.append(Dropout(config['dropout']))
        self.fcs.append(Dense(output_dim, activation='softmax'))
        self.fcs = Sequential(self.fcs)

    def call(self, x, training=False):
        """
        Args:
            x: [B, S]
        Returns:
            out: [B, 3]
        """
        # [B, S] -> [B, S, D]
        embedded_x = self.embedding(x)
        embedded_x = self.embedding_adapter(embedded_x)

        mask = tf.not_equal(x, self.vocab.pad_id)
        
        # [B, S, 2H]
        rnn_hiddens, rnn_context = self.rnn_encoder(embedded_x, mask)
        rnn_context = self.rnn_context_norm(rnn_context)

        # [B, S, D] -> [B, P]
        pattern = self.cnn_encoder(rnn_hiddens, training=training)
        pattern = self.pattern_norm(pattern)

        # context: [B, 2H]
        cnn_context = self.attention(rnn_hiddens, pattern, mask)
        cnn_context = self.cnn_context_norm(cnn_context)

        out = tf.concat([cnn_context, rnn_context], axis=-1)

        out = self.fcs(out)

        return out

## Train

In [19]:
with strategy.scope():
    model = HybridModel(
        vocab=vocab,
        lstm_hidden_dim=256,
        rnn_fc_configs=[
            {'dim': 512, 'activation': 'gelu', 'dropout': 0.25},
            {'dim': 256, 'activation': 'gelu', 'dropout': 0.25},
        ],
        cnn_inception_configs=[
            {'branch_filters': [128, 64, 32], 'kernel_sizes': [2, 3, 5]},
            {'branch_filters': [256, 128, 64], 'kernel_sizes': [2, 3, 5]},
        ],
        cnn_pooling_configs=[
            {'type': 'max', 'size': 3, 'strides': 1, 'padding': 'same'},
            {'type': 'global_average'},
        ],
        cnn_output_dims=[512, 512],
        cnn_fc_configs=[
            {'dim': 512, 'activation': 'relu', 'dropout': 0.25},
            {'dim': 256, 'activation': 'relu', 'dropout': 0.25}
        ],
        attention_dim=512,
        fc_out_configs=[
            {'dim': 512, 'activation': 'gelu', 'dropout': 0.2},
            {'dim': 256, 'activation': 'gelu', 'dropout': 0.2},
            {'dim': 128, 'activation': 'gelu', 'dropout': 0.2},
        ],
        output_dim=3,
    )

    dummy_input = tf.zeros((1, 512), dtype=tf.int32)
    _ = model(dummy_input)
    
    model.compile(
        optimizer=keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=5e-3),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        # metrics=['accuracy']
    )

I0000 00:00:1756278822.429003      10 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


In [20]:
class WeightedF1Checkpoint(tf.keras.callbacks.Callback):
    def __init__(self, val_dataset, save_path='best_model.weights.h5'):
        super().__init__()
        self.val_dataset = val_dataset
        self.save_path = save_path
        self.best_f1_weighted = 0.0

    def on_epoch_end(self, epoch, logs=None):
        y_pred_prob = self.model.predict(self.val_dataset)
        y_pred = np.argmax(y_pred_prob, axis=1)
        
        y_true = np.concatenate([y for x, y in self.val_dataset], axis=0)

        f1_weighted = f1_score(y_true, y_pred, average='weighted')

        print(f"val_weighted_f1 = {f1_weighted:.4f}")
        
        if f1_weighted > self.best_f1_weighted:
            self.best_f1_weighted = f1_weighted
            self.model.save_weights(self.save_path)

In [22]:
# early_stop = EarlyStopping(
#     monitor='val_loss',
#     patience=5,
#     restore_best_weights=False
# )

f1_callback = WeightedF1Checkpoint(
    valid_dataset, 
    save_path='best_model.weights.h5'
)

class_weight = {
    label_encoder.label2id['POS']: 1.0,
    label_encoder.label2id['NEG']: 1.0 / 0.7,
    label_encoder.label2id['NEU']: 1.0 / 0.55,
}

In [23]:
with strategy.scope():
    history = model.fit(
        train_dataset,
        validation_data=valid_dataset,
        epochs=30,
        callbacks=[
            f1_callback, 
            # early_stop
        ],
        class_weight=class_weight
    )

Epoch 1/30


I0000 00:00:1756278918.606209      10 encapsulate_tpu_computations_pass.cc:266] Subgraph fingerprint:3088870120923251187
I0000 00:00:1756278921.014146     942 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(16586193888508352171), session_name()
I0000 00:00:1756278939.783540     942 tpu_compile_op_common.cc:245] Compilation of 16586193888508352171 with session name  took 18.76934551s and succeeded
I0000 00:00:1756278939.851894     942 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(16586193888508352171), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_3088870120923251187", property.function_library_fingerprint = 14440420843803009582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap()

[1m3625/3626[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 198ms/step - loss: 0.9248

I0000 00:00:1756279655.939227     928 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(1823147710482391409), session_name()
I0000 00:00:1756279675.715718     928 tpu_compile_op_common.cc:245] Compilation of 1823147710482391409 with session name  took 19.776443137s and succeeded
I0000 00:00:1756279675.799355     928 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(1823147710482391409), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_3088870120923251187", property.function_library_fingerprint = 14440420843803009582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "7,512,;7,;7,;", property.guaranteed_constants_size = 0, embedding_partit

[1m3626/3626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step - loss: 0.9248

I0000 00:00:1756279679.459241      10 encapsulate_tpu_computations_pass.cc:266] Subgraph fingerprint:8890144241869602698
I0000 00:00:1756279679.954033     909 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(16989962942665468449), session_name()
I0000 00:00:1756279682.979983     909 tpu_compile_op_common.cc:245] Compilation of 16989962942665468449 with session name  took 3.025905765s and succeeded
I0000 00:00:1756279682.989588     909 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(16989962942665468449), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_8890144241869602698", property.function_library_fingerprint = 15896164004160230510, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap()

[1m292/293[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step

I0000 00:00:1756279717.833607      10 encapsulate_tpu_computations_pass.cc:266] Subgraph fingerprint:18172037700898505644
I0000 00:00:1756279718.190131     880 tpu_compilation_cache_interface.cc:442] TPU host compilation cache miss: cache_key(6556355573928910235), session_name()
I0000 00:00:1756279720.094234     880 tpu_compile_op_common.cc:245] Compilation of 6556355573928910235 with session name  took 1.904049096s and succeeded
I0000 00:00:1756279720.102684     880 tpu_compilation_cache_interface.cc:476] TPU host compilation cache: compilation complete for cache_key(6556355573928910235), session_name(), subgraph_key(std::string(property.function_name) = "cluster_one_step_on_data_distributed_18172037700898505644", property.function_library_fingerprint = 3478284551076888438, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topol

[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 68ms/step
val_weighted_f1 = 0.8171
[1m3626/3626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m813s[0m 217ms/step - loss: 0.9248 - val_loss: 0.5054
Epoch 2/30
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step
val_weighted_f1 = 0.8302
[1m3626/3626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m756s[0m 208ms/step - loss: 0.6621 - val_loss: 0.4717
Epoch 3/30
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step
val_weighted_f1 = 0.8468
[1m3626/3626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m757s[0m 208ms/step - loss: 0.6073 - val_loss: 0.4110
Epoch 4/30
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step
val_weighted_f1 = 0.8576
[1m3626/3626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m758s[0m 209ms/step - loss: 0.5637 - val_loss: 0.3918
Epoch 5/30
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 57ms/step
val_weighte

## Test

In [28]:
model.load_weights('/kaggle/working/best_model.weights.h5')

In [29]:
y_pred_prob = model.predict(test_dataset)
y_pred = np.argmax(y_pred_prob, axis=1)

y_true = np.concatenate([y for x, y in test_dataset], axis=0)

acc = accuracy_score(y_true, y_pred)
print(f'Accuracy: {acc:.4f}\n')

# F1 Score
print("F1 Scores:")
print(f"\tMacro:    {f1_score(y_true, y_pred, average='macro'):.4f}")
print(f"\tMicro:    {f1_score(y_true, y_pred, average='micro'):.4f}")
print(f"\tWeighted: {f1_score(y_true, y_pred, average='weighted'):.4f}\n")

# Precision
print("Precision Scores:")
print(f"\tMacro:    {precision_score(y_true, y_pred, average='macro'):.4f}")
print(f"\tMicro:    {precision_score(y_true, y_pred, average='micro'):.4f}")
print(f"\tWeighted: {precision_score(y_true, y_pred, average='weighted'):.4f}\n")

# Recall
print("Recall Scores:")
print(f"\tMacro:    {recall_score(y_true, y_pred, average='macro'):.4f}")
print(f"\tMicro:    {recall_score(y_true, y_pred, average='micro'):.4f}")
print(f"\tWeighted: {recall_score(y_true, y_pred, average='weighted'):.4f}\n")


[1m335/335[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 59ms/step
Accuracy: 0.8801

F1 Scores:
	Macro:    0.7385
	Micro:    0.8801
	Weighted: 0.8858

Precision Scores:
	Macro:    0.7176
	Micro:    0.8801
	Weighted: 0.8943

Recall Scores:
	Macro:    0.7690
	Micro:    0.8801
	Weighted: 0.8801

