In [1]:
import os
import numpy as np
import logging
import inspect

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, f1_score

from SASentimentModel import SASentimentModel
from kaggle_dataset import KaggleDataSet

from sa_model_pipeline import SAModelPipeline
from sa_model_config_loader import SAModelConfigLoader
from sa_data_loader import SADataLoader
from sa_model_params import SAModelParams
from SASelfAttentionModel import SASelfAttentionModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
def main():

    ### 
    ### To run your model, 
    ###
    ### 1) Change model_module_name to match your model's module name, i.e., the file name
    ### 2) Change model_class_name to match  your model's class name like SASelfAttentionModel
    ### 3) Look for this line at the bottom of the method,
    ###    sa_sentiment_model = SASelfAttentionModel(sa_model_params)
    ###    Change SASelfAttentionModel to match your model's class.
    ###    That's it.
    ### 4) Then run this notebook from start to finish.
    ###


    ###
    ### Modify model_module_name and model_class_name to match your model's 
    ### module and class name and then change the model instance to match your model class below
    ###
    model_module_name = "SASelfAttentionModel"
    model_class_name = "SASelfAttentionModel"

    ### Load the model configuration and then find the model specified above
    sa_model_config_loader = SAModelConfigLoader()
    list_of_model_config = sa_model_config_loader.load_model_config()
    model_config = sa_model_config_loader.find_model_config(model_module_name, model_class_name)
    
    ###
    ### Construct the path to the train, test and validation CSV file specified in the model config file
    ### for the given model
    ###
    cwd = os.getcwd()        
    train_file = model_config.get_model_train_csv_file_name()
    test_file = model_config.get_model_test_csv_file_name()
    validation_file = model_config.get_model_validation_csv_file_name()

    path_to_train_csv_file = os.path.join(cwd, train_file)
    path_to_test_csv_file = os.path.join(cwd, test_file)
    path_to_validation_csv_file = os.path.join(cwd, validation_file)

    ### Load the train, test and validation data file specific to the model
    sa_data_loader = SADataLoader(path_to_train_csv_file, path_to_test_csv_file, path_to_validation_csv_file)
    sa_data_loader.load_data(KaggleDataSet.get_kaggle_column_names())

    ### Construct the model parameter object
    sa_model_params = SAModelParams(sa_data_loader, model_config)

    logger.info(f"Start running model: {model_module_name}:{model_class_name}") 

    ###
    ### CHANGE THE MODEL TO YOUR MODEL CLASS!!
    ### 

    sa_sentiment_model = SASelfAttentionModel(sa_model_params)
    
    ###
    ###
    ###

    ###
    ### Call the SASentimentModel's run() which will run the model pipeline
    ###
    sa_sentiment_model.run(sa_model_params)

    logger.info(f"Finished running model: {sa_sentiment_model.__class__.__name__}") 


In [4]:
if __name__ == "__main__":
    main()

Transform set to: <bound method BinaryLabelTransformer.transform of <binary_label_transformer.BinaryLabelTransformer object at 0x000001341E5AD1C0>>, Type: <class 'method'>
Path to train csv file: z:\life\edu\NU-DAE\IE 7500 - NLP\Proj\src\SA_NLP\train_60K.csv
Path to test csv file: z:\life\edu\NU-DAE\IE 7500 - NLP\Proj\src\SA_NLP\test_20K.csv
Path to validation csv file: z:\life\edu\NU-DAE\IE 7500 - NLP\Proj\src\SA_NLP\validate_20K.csv


2025-06-24 15:21:26,407 - INFO - Start running model: SASelfAttentionModel:SASelfAttentionModel
2025-06-24 15:21:26,408 - INFO - Calling SASelfAttentionModel.register(): Self Attention Model/SASelfAttentionModel/SASelfAttentionModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'max_features': 20000, 'sequence_length': 500, 'embedding_dim': 20000, 'embed_output': 128, 'epoch': 5, 'batch_size': 32}
2025-06-24 15:21:26,408 - INFO - SASelfAttentionModel.register(): Completed
2025-06-24 15:21:26,410 - INFO - Calling SASelfAttentionModel.preprocess(): Self Attention Model/SASelfAttentionModel/SASelfAttentionModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'max_features': 20000, 'sequence_length': 500, 'embedding_dim': 20000, 'embed_output': 128, 'epoch': 5, 'batch_size': 32}


Train size: 60000
Test size: 20000
Validation size: 20000


2025-06-24 15:21:27,624 - INFO - SASelfAttentionModel.preprocess(): Completed
2025-06-24 15:21:27,624 - INFO - Calling SASelfAttentionModel.fit(): Self Attention Model/SASelfAttentionModel/SASelfAttentionModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'max_features': 20000, 'sequence_length': 500, 'embedding_dim': 20000, 'embed_output': 128, 'epoch': 5, 'batch_size': 32}
2025-06-24 15:21:27,664 - INFO - Calling SASelfAttentionModel.fit(): Model compiled
2025-06-24 15:21:27,669 - INFO - Calling SASelfAttentionModel.fit(): Fitting model: X_train: 60000, y_train: 60000, X_val: 20000, y_val: 20000,


Epoch 1/5






[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 164ms/step - accuracy: 0.7669 - loss: 0.4570 - val_accuracy: 0.8784 - val_loss: 0.2838
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 163ms/step - accuracy: 0.9183 - loss: 0.2129 - val_accuracy: 0.8892 - val_loss: 0.2676
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m316s[0m 168ms/step - accuracy: 0.9522 - loss: 0.1363 - val_accuracy: 0.8829 - val_loss: 0.2946
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 161ms/step - accuracy: 0.9742 - loss: 0.0776 - val_accuracy: 0.8778 - val_loss: 0.3760
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 161ms/step - accuracy: 0.9855 - loss: 0.0456 - val_accuracy: 0.8729 - val_loss: 0.4551


2025-06-24 15:47:03,963 - INFO - Calling SASelfAttentionModel.fit(): Model fitted
2025-06-24 15:47:03,963 - INFO - SASelfAttentionModel.fit(): Completed
2025-06-24 15:47:03,963 - INFO - Calling SASelfAttentionModel.summary(): Self Attention Model/SASelfAttentionModel/SASelfAttentionModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'max_features': 20000, 'sequence_length': 500, 'embedding_dim': 20000, 'embed_output': 128, 'epoch': 5, 'batch_size': 32}


2025-06-24 15:47:03,995 - INFO - SASelfAttentionModel.summary(): Completed
2025-06-24 15:47:03,995 - INFO - Calling SASelfAttentionModel.predict(): Self Attention Model/SASelfAttentionModel/SASelfAttentionModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'max_features': 20000, 'sequence_length': 500, 'embedding_dim': 20000, 'embed_output': 128, 'epoch': 5, 'batch_size': 32}


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 42ms/step


2025-06-24 15:47:45,127 - INFO - SASelfAttentionModel.predict(): Completed
2025-06-24 15:47:45,127 - INFO - Calling SASelfAttentionModel.evaluate(): Self Attention Model/SASelfAttentionModel/SASelfAttentionModel/train_60K.csv/test_20K.csv/validate_20K.csv/{'vocab_size': 10000, 'max_features': 20000, 'sequence_length': 500, 'embedding_dim': 20000, 'embed_output': 128, 'epoch': 5, 'batch_size': 32}


              precision    recall  f1-score   support

           0       0.86      0.89      0.88     10000
           1       0.89      0.85      0.87     10000

    accuracy                           0.87     20000
   macro avg       0.87      0.87      0.87     20000
weighted avg       0.87      0.87      0.87     20000

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 43ms/step - accuracy: 0.8838 - loss: 0.3982


2025-06-24 15:48:11,922 - INFO - SASelfAttentionModel.evaluate(): Completed
2025-06-24 15:48:11,922 - INFO - Finished running model: SASelfAttentionModel
