In [None]:

import pandas as pd
import numpy as np
import torch as tc
from sklearn.metrics import f1_score
from models.rnn.simple_rnn_keras import SimpleRNNKeras
from models.rnn.simple_rnn_manual import SimpleRNNManual
from helper.text_vectorization import TextPreprocessor


train_df = pd.read_csv("../data/nusaX-sentiment/train.csv")
valid_df = pd.read_csv("../data/nusaX-sentiment/valid.csv")
test_df  = pd.read_csv("../data/nusaX-sentiment/test.csv")

label_map = {'negative': 0, 'neutral': 1, 'positive': 2}

y_train = train_df["label"].map(label_map).values.astype(np.int32)
y_valid = valid_df["label"].map(label_map).values.astype(np.int32)
y_test  = test_df["label"].map(label_map).values.astype(np.int32)


prep = TextPreprocessor(max_vocab=10000, max_len=100)
prep.adapt(train_df["text"])

X_train = prep.transform(train_df["text"])
X_valid = prep.transform(valid_df["text"])
X_test  = prep.transform(test_df["text"])


model_keras = SimpleRNNKeras(
    max_vocab=10000,
    max_len=100,
    rnn_units=[64],
    dense_units=[32, 3],
    dense_activations=['relu', 'softmax'],
    bidirectional=True
)

model_keras.set_vectorized_data(
    X_train=X_train,
    y_train=y_train,
    X_valid=X_valid,
    y_valid=y_valid,
    X_test=X_test,
    y_test=y_test
)

model_keras.build_model()
model_keras.train(epochs=5)
model_keras.save_full_npz("model_simple_rnn.npy")


model_manual = SimpleRNNManual()
model_manual.load_full_npz("model_simple_rnn.npy")

X_test_tensor = tc.tensor(X_test.numpy(), dtype=tc.long)
y_test_tensor = tc.tensor(y_test, dtype=tc.long)


y_pred_keras = model_keras.model.predict(X_test)
y_pred_keras = y_pred_keras.argmax(axis=1)

y_pred_manual = model_manual.predict(X_test_tensor).cpu().numpy()

f1_keras = f1_score(y_test, y_pred_keras, average='macro')
f1_manual = f1_score(y_test, y_pred_manual, average='macro')

print(f"Keras  F1-score:   {f1_keras:.4f}")
print(f"Manual F1-score:  {f1_manual:.4f}")


Epoch 1/5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - accuracy: 0.3602 - loss: 1.1090 - val_accuracy: 0.4200 - val_loss: 1.0601
Epoch 2/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5512 - loss: 0.9720 - val_accuracy: 0.4800 - val_loss: 0.9684
Epoch 3/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5701 - loss: 0.8857 - val_accuracy: 0.5000 - val_loss: 1.0225
Epoch 4/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.7719 - loss: 0.6851 - val_accuracy: 0.5000 - val_loss: 0.9642
Epoch 5/5
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.8999 - loss: 0.4893 - val_accuracy: 0.5200 - val_loss: 0.9659
Saved full model to model_simple_rnn.npy
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Keras  F1-score:   0.4929
Manual F1-score:  0.4929
