In [1]:
import os
import numpy as np
import logging
import inspect
import re

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, f1_score

from SASentimentModel import SASentimentModel
from kaggle_dataset import KaggleDataSet

from sa_model_pipeline import SAModelPipeline
from sa_model_config_loader import SAModelConfigLoader
from sa_data_loader import SADataLoader
from sa_model_params import SAModelParams
from SASelfAttentionModel import SASelfAttentionModel
from SARnnModel import SARnnModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
### Extract epoch number from checkpoint file
### example: 'epoch03' will return 3, else 0 if no number is found

def extract_initial_epoch_from_checkpoint(path):
    match = re.search(r"epoch(\d+)", path)
    return int(match.group(1)) if match else 0

In [4]:
def main():

    ### TOGGLE
    
    ### If no existing model to load, set LOAD_EXISTING_MODEL to False (train from scratch)
    ### If there is an existing model to load, set LOAD_EXISTING_MODEL to True and load the CHECKPOINT_PATH
    
    LOAD_EXISTING_MODEL = False
    CHECKPOINT_PATH = ""  # adjust to the saved keras model
    
    ### 
    ### To run your model, 
    ###
    ### 1) Change model_module_name to match your model's module name, i.e., the file name
    ### 2) Change model_class_name to match  your model's class name like SASelfAttentionModel
    ### 3) Look for this line at the bottom of the method,
    ###    sa_sentiment_model = SASelfAttentionModel(sa_model_params)
    ###    Change SASelfAttentionModel to match your model's class.
    ###    That's it.
    ### 4) Then run this notebook from start to finish.
    ###


    ###
    ### Modify model_module_name and model_class_name to match your model's 
    ### module and class name and then change the model instance to match your model class below
    ###
    model_module_name = "SARnnModel"
    model_class_name = "SARnnModel"

    ### Load the model configuration and then find the model specified above
    sa_model_config_loader = SAModelConfigLoader()
    list_of_model_config = sa_model_config_loader.load_model_config()
    model_config = sa_model_config_loader.find_model_config(model_module_name, model_class_name)
    
    ###
    ### Construct the path to the train, test and validation CSV file specified in the model config file
    ### for the given model
    ###
    cwd = os.getcwd()        
    train_file = model_config.get_model_train_csv_file_name()
    test_file = model_config.get_model_test_csv_file_name()
    validation_file = model_config.get_model_validation_csv_file_name()

    path_to_train_csv_file = os.path.join(cwd, train_file)
    path_to_test_csv_file = os.path.join(cwd, test_file)
    path_to_validation_csv_file = os.path.join(cwd, validation_file)

    ### Load the train, test and validation data file specific to the model
    sa_data_loader = SADataLoader(path_to_train_csv_file, path_to_test_csv_file, path_to_validation_csv_file)
    sa_data_loader.load_data(KaggleDataSet.get_kaggle_column_names())

    ### Construct the model parameter object
    sa_model_params = SAModelParams(sa_data_loader, model_config)
    logger.info(f"Start running model: {model_module_name}:{model_class_name}") 

    ###
    ### CHANGE THE MODEL TO YOUR MODEL CLASS!!
    ### 

    sa_sentiment_model = SARnnModel(sa_model_params)
    
    ### Register model/preprocessing
    sa_sentiment_model.register(sa_model_params)
    sa_sentiment_model.preprocess(sa_model_params)
    
    ### Checkpoint
    if LOAD_EXISTING_MODEL:
        initial_epoch = extract_initial_epoch_from_checkpoint(CHECKPOINT_PATH) ### epoch number from checkpoint filename
        sa_sentiment_model.load_model_from_file(CHECKPOINT_PATH) ### load saved model weights
        logger.info(f"Resuming training from epoch {initial_epoch}")
        sa_sentiment_model.fit(sa_model_params, resume_training=True, initial_epoch=initial_epoch) ### continue training
        sa_sentiment_model.predict(sa_model_params)
        sa_sentiment_model.evaluate(sa_model_params)
    ### Call the SASentimentModel's run() which will run the model pipeline
    else:
        sa_sentiment_model.run(sa_model_params)
        
    ###
    ###
    ###


    logger.info(f"Finished running model: {sa_sentiment_model.__class__.__name__}") 


In [5]:
if __name__ == "__main__":
    main()

Transform set to: <bound method BinaryLabelTransformer.transform of <binary_label_transformer.BinaryLabelTransformer object at 0x0000019A0F230400>>, Type: <class 'method'>
Path to train csv file: C:\Users\dingg\IE 7500\IE7500-GroupProject-main\train_60K.csv
Path to test csv file: C:\Users\dingg\IE 7500\IE7500-GroupProject-main\test_20K.csv
Path to validation csv file: C:\Users\dingg\IE 7500\IE7500-GroupProject-main\validate_20K.csv


2025-06-26 16:58:49,075 - INFO - Start running model: SARnnModel:SARnnModel
2025-06-26 16:58:49,076 - INFO - Calling SARnnModel.register(): RNN Sentiment Model/SARnnModel/SARnnModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'sequence_length': 500, 'embedding_dim': 100, 'epoch': 5, 'batch_size': 32}
2025-06-26 16:58:49,077 - INFO - SARnnModel.register(): Completed
2025-06-26 16:58:49,078 - INFO - Calling SARnnModel.preprocess(): RNN Sentiment Model/SARnnModel/SARnnModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'sequence_length': 500, 'embedding_dim': 100, 'epoch': 5, 'batch_size': 32}


Train size: 60000
Test size: 20000
Validation size: 20000


2025-06-26 16:58:57,625 - INFO - SARnnModel.preprocess(): Completed
2025-06-26 16:58:57,627 - INFO - Calling SARnnModel.register(): RNN Sentiment Model/SARnnModel/SARnnModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'sequence_length': 500, 'embedding_dim': 100, 'epoch': 5, 'batch_size': 32}
2025-06-26 16:58:57,628 - INFO - SARnnModel.register(): Completed
2025-06-26 16:58:57,629 - INFO - Calling SARnnModel.preprocess(): RNN Sentiment Model/SARnnModel/SARnnModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'sequence_length': 500, 'embedding_dim': 100, 'epoch': 5, 'batch_size': 32}
2025-06-26 16:59:06,506 - INFO - SARnnModel.preprocess(): Completed
2025-06-26 16:59:06,508 - INFO - Starting fit; resume_training=False, initial_epoch=0
2025-06-26 16:59:06,567 - INFO - Built and compiled new model


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.4984 - loss: 0.7058
Epoch 1: saving model to model_checkpoint_epoch01_valacc0.5116.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 137ms/step - accuracy: 0.4984 - loss: 0.7058 - val_accuracy: 0.5116 - val_loss: 0.6959
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - accuracy: 0.5154 - loss: 0.6952
Epoch 2: saving model to model_checkpoint_epoch02_valacc0.5286.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 136ms/step - accuracy: 0.5154 - loss: 0.6952 - val_accuracy: 0.5286 - val_loss: 0.6925
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.5254 - loss: 0.6925
Epoch 3: saving model to model_checkpoint_epoch03_valacc0.5293.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 137ms/step - accuracy: 0.5254 - loss: 0.

2025-06-26 17:20:33,151 - INFO - Best val_accuracy: 0.5293 at epoch 5
2025-06-26 17:20:33,153 - INFO - SARnnModel.fit(): Model fitted
2025-06-26 17:20:33,154 - INFO - SARnnModel.fit(): Completed


2025-06-26 17:20:33,183 - INFO - Calling SARnnModel.predict(): RNN Sentiment Model/SARnnModel/SARnnModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'sequence_length': 500, 'embedding_dim': 100, 'epoch': 5, 'batch_size': 32}


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 36ms/step


2025-06-26 17:20:56,368 - INFO - SARnnModel.predict(): Completed
2025-06-26 17:20:56,368 - INFO - Calling SARnnModel.evaluate(): RNN Sentiment Model/SARnnModel/SARnnModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'sequence_length': 500, 'embedding_dim': 100, 'epoch': 5, 'batch_size': 32}


              precision    recall  f1-score   support

           0       0.52      0.63      0.57     10000
           1       0.53      0.41      0.46     10000

    accuracy                           0.52     20000
   macro avg       0.52      0.52      0.51     20000
weighted avg       0.52      0.52      0.51     20000

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 36ms/step - accuracy: 0.6008 - loss: 0.7090


2025-06-26 17:21:19,149 - INFO - SARnnModel.evaluate(): Accuracy: 0.5201500058174133, Loss: 0.6935680508613586
2025-06-26 17:21:19,150 - INFO - SARnnModel.evaluate(): Completed
2025-06-26 17:21:19,151 - INFO - Finished running model: SARnnModel
