## Importing Libraries

In [20]:
import pandas as pd
import numpy as np
import torch
import random
from numpy import array
from numpy import argmax

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model

from transformers import BertTokenizer, TFBertForSequenceClassification

import utils
import config

def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    
set_seed(42)    #Set seed for reproducibility

import os
os.getcwd()

'/home/jupyter/NLP_Track/Deepak/Git'

## Configure

In [21]:
input_path=config.input_path
target=config.target
feature=config.feature
num_labels=config.num_labels
learning_rate=config.learning_rate
epochs=config.epochs
batch_size=config.batch_size
loss=config.loss
metric=config.metric
model_name=config.model_name
file_for_pred_input_path=config.file_for_pred_input_path
final_classification_report_output=config.final_classification_report_output

# Run Pipeline

### Training Bert Model with augm_text_default_bn_gpt_neo

In [13]:
aug_column_val="augm_text_default_bn_gpt_neo"
path='/home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_default_bn_gpt_neo'

In [6]:
utils.pipeline_run(input_path,aug_column_val,target, feature, num_labels,learning_rate,epochs,batch_size,loss,metric,model_name,path)

(3059,)
(765,)
(3059, 2)
(765, 2)


2024-07-21 07:56:37.710348: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-21 07:56:38.276004: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14651 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0001:00:00.0, compute capability: 7.5
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


2024-07-21 07:57:17.399723: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 2/3
Epoch 3/3
Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bert (TFBertMainLayer)       multiple                  108310272 
_________________________________________________________________
dropout_37 (Dropout)         multiple                  0         
_________________________________________________________________
classifier (Dense)           multiple                  1538      
Total params: 108,311,810
Trainable params: 108,311,810
Non-trainable params: 0
_________________________________________________________________
None


2024-07-21 08:06:20.157249: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_default_bn_gpt_neo/assets


INFO:tensorflow:Assets written to: /home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_default_bn_gpt_neo/assets


Training of bert model is done
Train classification report
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      2435
           1       0.99      0.98      0.99       624

    accuracy                           0.99      3059
   macro avg       0.99      0.99      0.99      3059
weighted avg       0.99      0.99      0.99      3059

Val classification report
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       609
           1       0.97      0.98      0.98       156

    accuracy                           0.99       765
   macro avg       0.98      0.99      0.99       765
weighted avg       0.99      0.99      0.99       765



### Inferencing using Bert Model with augm_text_default_bn_gpt_neo

In [14]:
savedModel=load_model(path)

In [15]:
X_train_tokens_new, Y_train_one_hot_encoded, X_test_tokens_new, Y_test_one_hot_encoded = utils.data_prep_inference_pipeline(file_for_pred_input_path,feature,target,model_name)


In [16]:
report=utils.class_report(X_tokens=X_train_tokens_new,savedModel=savedModel, y=Y_train_one_hot_encoded)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3044
           1       0.98      0.98      0.98       419

    accuracy                           0.99      3463
   macro avg       0.99      0.99      0.99      3463
weighted avg       0.99      0.99      0.99      3463

Balanced Accuracy:  0.9877818251954626
F2 score:  [0.99704336 0.97852029]


In [23]:
with open(final_classification_report_output, 'w') as f:
    f.write("\n------ Classification report of Training Data using augm_text_default_bn_gpt_neo ------\n")
    f.write(report)

In [24]:
report=utils.class_report(X_tokens=X_test_tokens_new,savedModel=savedModel, y=Y_test_one_hot_encoded)
print(report)

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1472
           1       0.97      0.96      0.96       234

    accuracy                           0.99      1706
   macro avg       0.98      0.98      0.98      1706
weighted avg       0.99      0.99      0.99      1706

Balanced Accuracy:  0.9762547612411743
F2 score:  [0.99483906 0.95972579]


In [25]:
with open(final_classification_report_output, 'a') as f:
    f.write("\n------ Classification report of Test Data using augm_text_default_bn_gpt_neo ------\n")
    f.write(str(report))

### Training Bert Model with augm_text_default_mn_gpt_neo

In [26]:
aug_column_val="augm_text_default_mn_gpt_neo"
path='/home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_default_mn_gpt_neo'

In [11]:
utils.pipeline_run(input_path,aug_column_val,target, feature, num_labels,learning_rate,epochs,batch_size,loss,metric,model_name,path)

(3059,)
(765,)
(3059, 2)
(765, 2)


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "tf_bert_for_sequence_classification_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bert (TFBertMainLayer)       multiple                  108310272 
_________________________________________________________________
dropout_75 (Dropout)         multiple                  0         
_________________________________________________________________
classifier (Dense)           multiple                  1538      
Total params: 108,311,810
Trainable params: 108,311,810
Non-trainable params: 0
_________________________________________________________________
None




INFO:tensorflow:Assets written to: /home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_default_mn_gpt_neo/assets


INFO:tensorflow:Assets written to: /home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_default_mn_gpt_neo/assets


Training of bert model is done
Train classification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2435
           1       1.00      0.98      0.99       624

    accuracy                           1.00      3059
   macro avg       1.00      0.99      0.99      3059
weighted avg       1.00      1.00      1.00      3059

Val classification report
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       609
           1       0.98      0.96      0.97       156

    accuracy                           0.99       765
   macro avg       0.98      0.98      0.98       765
weighted avg       0.99      0.99      0.99       765



### Inferencing using Bert Model with augm_text_default_mn_gpt_neo

In [27]:
savedModel=load_model(path)

In [28]:
X_train_tokens_new, Y_train_one_hot_encoded, X_test_tokens_new, Y_test_one_hot_encoded = utils.data_prep_inference_pipeline(file_for_pred_input_path,feature,target,model_name)


In [29]:
report=utils.class_report(X_tokens=X_train_tokens_new,savedModel=savedModel, y=Y_train_one_hot_encoded)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3044
           1       0.99      0.97      0.98       419

    accuracy                           0.99      3463
   macro avg       0.99      0.98      0.99      3463
weighted avg       0.99      0.99      0.99      3463

Balanced Accuracy:  0.9823080107508334
F2 score:  [0.9975046  0.97029229]


In [30]:
with open(final_classification_report_output, 'a') as f:
    f.write("\n------ Classification report of Training Data using augm_text_default_mn_gpt_neo ------\n")
    f.write(report)

In [31]:
report=utils.class_report(X_tokens=X_test_tokens_new,savedModel=savedModel, y=Y_test_one_hot_encoded)
print(report)

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1472
           1       0.97      0.95      0.96       234

    accuracy                           0.99      1706
   macro avg       0.98      0.97      0.98      1706
weighted avg       0.99      0.99      0.99      1706

Balanced Accuracy:  0.9723209308807135
F2 score:  [0.99511268 0.95360825]


In [32]:
with open(final_classification_report_output, 'a') as f:
    f.write("\n------ Classification report of Test Data using augm_text_default_mn_gpt_neo ------\n")
    f.write(report)

### Training Bert Model with augm_text_bn_gpt_neo_arg

In [17]:
# aug_column_val="augm_text_bn_gpt_neo_arg"
# path='/home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_bn_gpt_neo_arg'

In [18]:
# utils.pipeline_run(input_path,aug_column_val,target, feature, num_labels,learning_rate,epochs,batch_size,loss,metric,model_name,path)

### Inferencing using Bert Model with augm_text_bn_gpt_neo_arg

In [19]:
# savedModel=load_model(path)

In [20]:
# X_train_tokens_new, Y_train_one_hot_encoded, X_test_tokens_new, Y_test_one_hot_encoded = utils.data_prep_inference_pipeline(file_for_pred_input_path,feature,target,model_name)


In [21]:
# report=utils.class_report(X_tokens=X_train_tokens_new,savedModel=savedModel, y=Y_train_one_hot_encoded)
# print(report)

In [33]:
# with open(final_classification_report_output, 'a') as f:
#     f.write("\n------ Classification report of Training Data using augm_text_bn_gpt_neo_arg ------\n")
#     f.write(report)

In [34]:
# report=utils.class_report(X_tokens=X_test_tokens_new,savedModel=savedModel, y=Y_test_one_hot_encoded)
# print(report)

In [35]:
# with open(final_classification_report_output, 'a') as f:
#     f.write("\n------ Classification report of Test Data using augm_text_bn_gpt_neo_arg ------\n")
#     f.write(report)

### Training Bert Model with augm_text_cstm_mn_gpt_neo

In [36]:
# aug_column_val="augm_text_cstm_mn_gpt_neo"
# path='/home/jupyter/NLP_Track/Deepak/Git/Files/nlp_task_bert_model_aug_text_cstm_mn_gpt_neo'

In [37]:
# utils.pipeline_run(input_path,aug_column_val,target, feature, num_labels,learning_rate,epochs,batch_size,loss,metric,model_name,path)

### Inferencing using Bert Model with augm_text_cstm_mn_gpt_neo

In [38]:
# savedModel=load_model(path)

In [39]:
# X_train_tokens_new, Y_train_one_hot_encoded, X_test_tokens_new, Y_test_one_hot_encoded = utils.data_prep_inference_pipeline(file_for_pred_input_path,feature,target,model_name)


In [40]:
# report=utils.class_report(X_tokens=X_train_tokens_new,savedModel=savedModel, y=Y_train_one_hot_encoded)
# print(report)

In [41]:
# with open(final_classification_report_output, 'a') as f:
#     f.write("\n------ Classification report of Training Data using augm_text_cstm_mn_gpt_neo ------\n")
#     f.write(report)

In [42]:
# report=utils.class_report(X_tokens=X_test_tokens_new,savedModel=savedModel, y=Y_test_one_hot_encoded)
# print(report)

In [43]:
# with open(final_classification_report_output, 'a') as f:
#     f.write("\n------ Classification report of Test Data using augm_text_cstm_mn_gpt_neo ------\n")
#     f.write(report)