### Import Lib

In [1]:
import os
import numpy as np
import pandas as pd
import keras
import matplotlib.pyplot as plt
from data_preprocessing import NusaXSentimentDataProcessor
from LSTM.lstm import LSTMModel
from RNN.rnn import RNNModel



### Import Dataset

In [2]:
data_dir = '../indonesian'
data_processor = NusaXSentimentDataProcessor(data_dir,sequence_length = 50)# 50 timesteps
print("Preparing data...")
(x_train, y_train), (x_val, y_val), (x_test, y_test) = data_processor.prepare_data()



Preparing data...
Unique labels found: {'positive', 'neutral', 'negative'}
Train data: 500 samples
Validation data: 100 samples
Test data: 400 samples


### Making Keras Model Architecture 

In [3]:
from tensorflow import keras

def create_and_train_model(x_train, y_train, x_val, y_val, vocab_size, num_classes, model_type='lstm'):
    embedding_dim = 1
    hidden_units = 10

    model = keras.Sequential()
    model.add(keras.layers.Embedding(vocab_size, embedding_dim))

    # Pilih jenis RNN
    if model_type.lower() == 'lstm':
        model.add(keras.layers.LSTM(hidden_units))
    elif model_type.lower() == 'simplernn':
        model.add(keras.layers.SimpleRNN(hidden_units))
    else:
        raise ValueError("model_type harus salah satu dari: 'lstm', 'simplernn', atau 'gru'")

    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(num_classes, activation='softmax'))

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    print(f"\nTraining {model_type.upper()} model with {num_classes} classes...")
    history = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=2,
        batch_size=32
    )

    return model, history


In [4]:
vocab_size = data_processor.get_vocabulary_size()
num_classes = data_processor.get_num_classes()
print(f"Vocab Size: {vocab_size} Num Classes {num_classes}")
print(vocab_size)

Vocab Size: 2836 Num Classes 3
2836


#### LSTM

In [5]:
model_lstm,hist = create_and_train_model(x_train,y_train,x_val,y_val,vocab_size,num_classes,'lstm')
model_lstm.summary()
#rumus param lstm: (1+10+1)*4*4 = 480
#rumus dense output: (10+1) * 3 = 33



Training LSTM model with 3 classes...
Epoch 1/2
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - accuracy: 0.3022 - loss: 1.0985 - val_accuracy: 0.4800 - val_loss: 1.0957
Epoch 2/2
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.3914 - loss: 1.0942 - val_accuracy: 0.3800 - val_loss: 1.0923


In [6]:
custom_model_lstm = LSTMModel(model_lstm) 
custom_model_lstm.print_info()


Model Architecture Information:

Layer 0: Embedding
------------------------
E (Embedding Matrix):
  Shape: (2836, 1)
  - rows: vocabulary size (|V|)
  - cols: embedding dimension (d)

Config:
{'name': 'embedding', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'input_dim': 2836, 'output_dim': 1, 'embeddings_initializer': {'module': 'keras.initializers', 'class_name': 'RandomUniform', 'config': {'seed': None, 'minval': -0.05, 'maxval': 0.05}, 'registered_name': None}, 'embeddings_regularizer': None, 'activity_regularizer': None, 'embeddings_constraint': None, 'mask_zero': False}

Layer 1: LSTM
------------------------
Weight Matrices:
W (Input Weight Matrix):
  Shape: (1, 40)
  - rows: input dimension (d)
  - cols: 4*h where h is hidden size (for i,f,g,o gates)

U (Recurrent Weight Matrix):
  Shape: (10, 40)
  - rows: hidden size (h)
  - cols: 4*h (for i,f,g,o gates)

b (Bias Vector):
  Shape: (40,)

In [7]:
y_keras = model_lstm.predict(x_test[0:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step


In [8]:
y_scratch = custom_model_lstm.forward(x_test[0:1])

total timestep: 50
x_t di t-0  =  [[0.03093815]]
x_t di t-1  =  [[-0.03148082]]
x_t di t-2  =  [[0.02662777]]
x_t di t-3  =  [[-0.02059043]]
x_t di t-4  =  [[-0.02477199]]
x_t di t-5  =  [[0.03858622]]
x_t di t-6  =  [[0.02897973]]
x_t di t-7  =  [[-0.02068324]]
x_t di t-8  =  [[-0.00640893]]
x_t di t-9  =  [[-0.06960691]]
x_t di t-10  =  [[-0.04805113]]
x_t di t-11  =  [[0.01642464]]
x_t di t-12  =  [[-0.01961239]]
x_t di t-13  =  [[-0.01761623]]
x_t di t-14  =  [[-0.04129658]]
x_t di t-15  =  [[0.02962817]]
x_t di t-16  =  [[-0.02187863]]
x_t di t-17  =  [[0.03592454]]
x_t di t-18  =  [[-0.07429778]]
x_t di t-19  =  [[-0.01257172]]
x_t di t-20  =  [[-0.01139451]]
x_t di t-21  =  [[-0.01139451]]
x_t di t-22  =  [[-0.01139451]]
x_t di t-23  =  [[-0.01139451]]
x_t di t-24  =  [[-0.01139451]]
x_t di t-25  =  [[-0.01139451]]
x_t di t-26  =  [[-0.01139451]]
x_t di t-27  =  [[-0.01139451]]
x_t di t-28  =  [[-0.01139451]]
x_t di t-29  =  [[-0.01139451]]
x_t di t-30  =  [[-0.01139451]]
x_t di

In [9]:
print(y_keras)
print(y_scratch)

[[0.3360068  0.31692553 0.34706768]]
[[0.3375466  0.31892905 0.34352437]]


#### SimpleRNN

In [10]:

model_rnn,hist = create_and_train_model(x_train,y_train,x_val,y_val,vocab_size,num_classes,'simplernn')
model_rnn.summary()



Training SIMPLERNN model with 3 classes...


Epoch 1/2
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.3068 - loss: 1.1102 - val_accuracy: 0.3600 - val_loss: 1.0927
Epoch 2/2
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4087 - loss: 1.0913 - val_accuracy: 0.4300 - val_loss: 1.0885


In [11]:
custom_model_rnn = RNNModel(model_rnn) 
custom_model_rnn.print_info()


Model Architecture Information:

Layer 0: Embedding
------------------------
E (Embedding Matrix):
  Shape: (2836, 1)
  - rows: vocabulary size (|V|)
  - cols: embedding dimension (d)

Config:
{'name': 'embedding_2', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'input_dim': 2836, 'output_dim': 1, 'embeddings_initializer': {'module': 'keras.initializers', 'class_name': 'RandomUniform', 'config': {'seed': None, 'minval': -0.05, 'maxval': 0.05}, 'registered_name': None}, 'embeddings_regularizer': None, 'activity_regularizer': None, 'embeddings_constraint': None, 'mask_zero': False}

Layer 1: SimpleRNN
------------------------
Weight Matrices:
W (Input Weight Matrix):
  Shape: (1, 10)
  - rows: input dimension (d)
  - cols: hidden size (h)

U (Recurrent Weight Matrix):
  Shape: (10, 10)
  - rows: hidden size (h)
  - cols: hidden size (h)

b (Bias Vector):
  Shape: (10,)
  - size: hidden size (h)

Simp

In [12]:
y_keras = model_rnn.predict(x_test[0:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step


In [13]:
#tinggal predict disini
# y_scratch = custom_model_rnn.predict(x_test[0:1])