In [6]:
# === HEADER ===
import os, random, numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from tensorflow.keras.layers import TextVectorization
from models.lstm.keras_model import build_lstm_model
from models.lstm.scratch_model import ScratchLSTMClassifier

# === SEEDING ===
os.environ['PYTHONHASHSEED'] = '42'
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# === DATA LOAD ===
train = pd.read_csv('./../data/nusaX-sentiment/train.csv')
valid = pd.read_csv('./../data/nusaX-sentiment/valid.csv')
test = pd.read_csv('./../data/nusaX-sentiment/test.csv')

le = LabelEncoder()
y_train = le.fit_transform(train['label'])
y_val = le.transform(valid['label'])
y_test = le.transform(test['label'])
num_classes = len(le.classes_)

texts_train = train['text'].astype(str).tolist()
texts_val = valid['text'].astype(str).tolist()
texts_test = test['text'].astype(str).tolist()

max_tokens = 20000
max_len = 100
embed_dim = 128

vectorizer = TextVectorization(max_tokens=max_tokens, output_sequence_length=max_len)
vectorizer.adapt(texts_train)

def make_dataset(texts, labels, batch=32, shuffle=True):
    x = vectorizer(tf.constant(texts))
    ds = tf.data.Dataset.from_tensor_slices((x, labels))
    if shuffle: ds = ds.shuffle(1024)
    return ds.batch(batch).prefetch(1)

print("Sample vocab:", vectorizer.get_vocabulary()[:10])
print("Max index:", len(vectorizer.get_vocabulary()))

ds_train = make_dataset(texts_train, y_train)
ds_val = make_dataset(texts_val, y_val, shuffle=False)
ds_test = make_dataset(texts_test, y_test, shuffle=False)

# === KERAS TRAINING ===
def train_and_eval(params, name):
    print(f"\n=== Running {name}: {params}")
    model = build_lstm_model(**params, max_len=max_len, max_tokens=max_tokens,
                             embed_dim=embed_dim, num_classes=num_classes)
    model.fit(ds_train, validation_data=ds_val, epochs=5)
    y_pred = np.argmax(model.predict(ds_test), axis=-1)
    f1 = f1_score(y_test, y_pred, average='macro')
    print(f"{name} macro-F1: {f1:.4f}")
    return model

params = {'n_layers': 3, 'units': [64, 128, 256], 'bidirectional': True}
best_model = train_and_eval(params, "Keras-Best")
best_model.save_weights("best_lstm_back.weights.h5")

# === UNPACK & CONVERT TO SCRATCH ===
def unpack_lstm(layer):
    if isinstance(layer, tf.keras.layers.Bidirectional):
        return_seq = layer.forward_layer.return_sequences
        Wf, Uf, bf = layer.forward_layer.get_weights()
        Wb, Ub, bb = layer.backward_layer.get_weights()
        return ('bidir', return_seq, (Wf, Uf, bf), (Wb, Ub, bb))
    else:
        return_seq = layer.return_sequences
        W, U, b = layer.get_weights()
        return ('unidir', return_seq, (W, U, b))

emb_w = best_model.layers[1].get_weights()[0]
d_w, d_b = best_model.layers[-1].get_weights()
lstm_layers = [ly for ly in best_model.layers if isinstance(ly, (tf.keras.layers.LSTM, tf.keras.layers.Bidirectional))]
scratch_specs = [unpack_lstm(ly) for ly in lstm_layers]

scratch_model = ScratchLSTMClassifier(emb_w, scratch_specs, d_w, d_b)

# === FORWARD SCRATCH EVALUATION ===
x_test_int = vectorizer(tf.constant(texts_test)).numpy()
pred_scratch = scratch_model.forward(x_test_int)
yhat_s = np.argmax(pred_scratch, axis=1)
f1_forward = f1_score(y_test, yhat_s, average='macro')
print(f"\nmacro-F1 (scratch - forward only): {f1_forward:.4f}")

# === BACKWARD MANUAL TRAINING SCRATCH ===
def softmax_cross_entropy_loss(logits, labels):
    y_onehot = np.eye(num_classes)[labels]
    exps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
    probs = exps / exps.sum(axis=1, keepdims=True)
    loss = -np.sum(y_onehot * np.log(probs + 1e-8)) / len(labels)
    grad = (probs - y_onehot) / len(labels)
    return loss, grad

x_train_int = vectorizer(tf.constant(texts_train)).numpy()
batch_size = 32
lr = 0.01
epochs = 10

for ep in range(epochs):
    idx = np.random.permutation(len(x_train_int))
    x_train_int = x_train_int[idx]
    y_train = y_train[idx]
    ep_loss = 0
    for i in range(0, len(x_train_int), batch_size):
        xb = x_train_int[i:i+batch_size]
        yb = y_train[i:i+batch_size]
        logits = scratch_model.forward(xb)
        loss, grad = softmax_cross_entropy_loss(logits, yb)
        scratch_model.backward(grad)
        scratch_model.dense.W -= lr * scratch_model.dense.dW
        scratch_model.dense.b -= lr * scratch_model.dense.db
        for spec in scratch_model.lstm_specs:
            if spec[0] == 'unidir':
                lstm = scratch_model.lstm_layers.pop(0)[1]
                lstm.W -= lr * lstm.dW
                lstm.U -= lr * lstm.dU
                lstm.b -= lr * lstm.db
            else:
                f_lstm, b_lstm = scratch_model.lstm_layers.pop(0)[1:]
                for lstm in [f_lstm, b_lstm]:
                    lstm.W -= lr * lstm.dW
                    lstm.U -= lr * lstm.dU
                    lstm.b -= lr * lstm.db
        scratch_model.embedding.W -= lr * scratch_model.embedding.dW
        ep_loss += loss
    print(f"[Epoch {ep+1}] Scratch Loss: {ep_loss:.4f}")

# === FINAL EVALUATION ===
pred_final = scratch_model.forward(x_test_int)
yhat_final = np.argmax(pred_final, axis=1)
f1_final = f1_score(y_test, yhat_final, average='macro')
print(f"\nmacro-F1 (scratch - trained): {f1_final:.4f}")

Sample vocab: ['', '[UNK]', np.str_('yang'), np.str_('di'), np.str_('dan'), np.str_('tidak'), np.str_('saya'), np.str_('dengan'), np.str_('enak'), np.str_('ini')]
Max index: 2836

=== Running Keras-Best: {'n_layers': 3, 'units': [64, 128, 256], 'bidirectional': True}
Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 172ms/step - loss: 1.0486 - val_loss: 1.0145
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 160ms/step - loss: 0.8537 - val_loss: 0.8182
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 155ms/step - loss: 0.5560 - val_loss: 0.9192
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 152ms/step - loss: 0.3558 - val_loss: 1.1204
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 153ms/step - loss: 0.2509 - val_loss: 1.1578
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step
Keras-Best macro-F1: 0.6050


  z = xt @ self.W + H @ self.U + self.b
  z = xt @ self.W + H @ self.U + self.b
  z = xt @ self.W + H @ self.U + self.b
  z = x @ self.W + self.b
  z = x @ self.W + self.b
  z = x @ self.W + self.b
  z = xt @ self.W + H @ self.U + self.b
  z = xt @ self.W + H @ self.U + self.b
  z = xt @ self.W + H @ self.U + self.b
  z = x @ self.W + self.b
  z = x @ self.W + self.b
  z = x @ self.W + self.b
  self.dW = x.T @ dL_dz
  self.dW = x.T @ dL_dz
  self.dW = x.T @ dL_dz
  dL_dx = dL_dz @ self.W.T
  dL_dx = dL_dz @ self.W.T
  dL_dx = dL_dz @ self.W.T
  dW += xt.T @ dz
  dW += xt.T @ dz
  dW += xt.T @ dz
  dU += self.cache[t - 1][1].T @ dz if t > 0 else 0
  dU += self.cache[t - 1][1].T @ dz if t > 0 else 0
  dU += self.cache[t - 1][1].T @ dz if t > 0 else 0
  dX_step = dz @ self.W.T
  dX_step = dz @ self.W.T
  dX_step = dz @ self.W.T
  dH_next = dz @ self.U.T
  dH_next = dz @ self.U.T
  dH_next = dz @ self.U.T



macro-F1 (scratch - forward only): 0.6050
[Epoch 1] Scratch Loss: 10.2723
[Epoch 2] Scratch Loss: 10.2415
[Epoch 3] Scratch Loss: 10.2081
[Epoch 4] Scratch Loss: 10.1949
[Epoch 5] Scratch Loss: 10.1580
[Epoch 6] Scratch Loss: 10.1229
[Epoch 7] Scratch Loss: 10.1169
[Epoch 8] Scratch Loss: 10.0812
[Epoch 9] Scratch Loss: 10.0625
[Epoch 10] Scratch Loss: 10.0600

macro-F1 (scratch - trained): 0.6104
