In [None]:

import pandas as pd
import torch as tc
from sklearn.metrics import f1_score
from models.rnn.simple_rnn_keras import SimpleRNNKeras
from models.rnn.simple_rnn_manual import SimpleRNNManual
from helper.text_vectorization import TextPreprocessor


train_df = pd.read_csv("../data/nusaX-sentiment/train.csv")
valid_df = pd.read_csv("../data/nusaX-sentiment/valid.csv")
test_df  = pd.read_csv("../data/nusaX-sentiment/test.csv")


prep = TextPreprocessor(max_vocab=10000, max_len=100)
prep.adapt(train_df["text"])

X_train = prep.transform(train_df["text"])
X_valid = prep.transform(valid_df["text"])
X_test  = prep.transform(test_df["text"])


model_keras = SimpleRNNKeras(
    max_vocab=10000,
    max_len=100,
    rnn_units=[64],
    dense_units=[32, 3],
    dense_activations=['relu', 'softmax'],
    bidirectional=True
)

model_keras.set_vectorized_data(
    X_train=X_train,
    y_train=train_df["label"],
    X_valid=X_valid,
    y_valid=valid_df["label"],
    X_test=X_test,
    y_test=test_df["label"]
)

model_keras.build_model()
model_keras.train(epochs=5)
model_keras.save_full_npz("model_simple_rnn.npz")


model_manual = SimpleRNNManual()
model_manual.load_full_npz("model_simple_rnn.npz")

X_test_tensor = tc.tensor(X_test.numpy(), dtype=tc.long)
y_test_tensor = tc.tensor(test_df["label"].values, dtype=tc.long)


y_pred_keras = model_keras.model.predict(X_test)
y_pred_keras = y_pred_keras.argmax(axis=1)

y_pred_manual = model_manual.predict(X_test_tensor).cpu().numpy()

f1_keras = f1_score(test_df["label"], y_pred_keras, average='macro')
f1_manual = f1_score(test_df["label"], y_pred_manual, average='macro')

print(f"Keras F1-score:   {f1_keras:.4f}")
print(f"Manual F1-score:  {f1_manual:.4f}")
