In [2]:
!pip install torch

Collecting torch
  Downloading torch-2.7.0-cp39-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting filelock (from torch)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.7.0-cp39-none-macosx_11_0_arm64.whl (68.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.6/68.6 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import random
import pandas as pd
import numpy as np
import torch as tc
from sklearn.metrics import f1_score
from models.rnn.simple_rnn_keras import SimpleRNNKeras
from models.rnn.simple_rnn_manual import SimpleRNNManual
from helper.text_vectorization import TextPreprocessor
import tensorflow as tf

SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)  
tc.manual_seed(SEED)
tc.backends.cudnn.deterministic = True
tc.backends.cudnn.benchmark = False
tc.use_deterministic_algorithms(True)


train_df = pd.read_csv("../data/nusaX-sentiment/train.csv")
valid_df = pd.read_csv("../data/nusaX-sentiment/valid.csv")
test_df  = pd.read_csv("../data/nusaX-sentiment/test.csv")

label_map = {'negative': 0, 'neutral': 1, 'positive': 2}

y_train = train_df["label"].map(label_map).values.astype(np.int32)
y_valid = valid_df["label"].map(label_map).values.astype(np.int32)
y_test  = test_df["label"].map(label_map).values.astype(np.int32)


prep = TextPreprocessor(max_vocab=10000, max_len=100)
prep.adapt(train_df["text"])

X_train = prep.transform(train_df["text"])
X_valid = prep.transform(valid_df["text"])
X_test  = prep.transform(test_df["text"])


model_keras = SimpleRNNKeras(
    max_vocab=10000,
    max_len=100,
    rnn_units=[64],
    dense_units=[32, 3],
    dense_activations=['relu', 'softmax'],
    bidirectional=True
)

model_keras.set_vectorized_data(
    X_train=X_train,
    y_train=y_train,
    X_valid=X_valid,
    y_valid=y_valid,
    X_test=X_test,
    y_test=y_test
)

model_keras.build_model()
model_keras.train(epochs=5)
model_keras.save_full_npz("model_simple_rnn.npy")


model_manual = SimpleRNNManual()
model_manual.load_full_npz("model_simple_rnn.npy")

X_test_tensor = tc.tensor(X_test.numpy(), dtype=tc.long)
y_test_tensor = tc.tensor(y_test, dtype=tc.long)


y_pred_keras = model_keras.model.predict(X_test)
y_pred_keras = y_pred_keras.argmax(axis=1)

y_pred_manual = model_manual.predict(X_test_tensor).cpu().numpy()

f1_keras = f1_score(y_test, y_pred_keras, average='macro')
f1_manual = f1_score(y_test, y_pred_manual, average='macro')

print(f"Keras  F1-score:   {f1_keras:.4f}")
print(f"Manual F1-score:  {f1_manual:.4f}")


Epoch 1/5


2025-05-15 09:43:15.856347: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-05-15 09:43:15.856620: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.3871 - loss: 1.0881 - val_accuracy: 0.4900 - val_loss: 1.0498
Epoch 2/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5115 - loss: 1.0047 - val_accuracy: 0.5300 - val_loss: 0.9728
Epoch 3/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5869 - loss: 0.8676 - val_accuracy: 0.4200 - val_loss: 1.0322
Epoch 4/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7352 - loss: 0.7475 - val_accuracy: 0.5500 - val_loss: 0.9809
Epoch 5/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8537 - loss: 0.5651 - val_accuracy: 0.5600 - val_loss: 0.9117
Saved full model to model_simple_rnn.npy
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Keras  F1-score:   0.5428
Manual F1-score:  0.5428
