In [None]:
!pip3 install scikit-learn

In [1]:
import os, sys
sys.path.insert(1, '../dataset')
import numpy as np
import pandas as pd
import warnings
import sklearn

warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

from transformers import BertTokenizer
import torch
from utils import tag_to_word_df

test the ATE model with the testing data, this process may require some modifications later, this is to check if the trained model works for our dataset

In [None]:
import torch

print(torch.__version__)
print(torch.backends.mps.is_available())

In [2]:
from abte import ABTEModel
from utils import convert_to_array,replace_SEP,replace_tags,replace_sentiment_tags

batch = 8
lr = 3*1e-5
epochs = 5

def run_ABTE_test_train(adapter, lr_schedule):
    if adapter:
        if lr_schedule: dir_name  = "model_ABTE_adapter_scheduler"
        else: dir_name = "model_ABTE_adapter"
    else:
        if lr_schedule: dir_name  = "model_ABTE_scheduler"
        else: dir_name = "model_ABTE"

    #load
    data = pd.read_csv('../dataset/absa_train_df_20240625.csv')
    data_test = pd.read_csv('../dataset/absa_test_df_20240625.csv')

    data['bio_tags'] = data['bio_tags'].apply(convert_to_array)
    data['sentiment_tags'] = data['sentiment_tags'].apply(convert_to_array)
    data['tokens'] = data['tokens'].apply(convert_to_array)

    data['bio_tags'] = data['bio_tags'].apply(replace_SEP).apply(replace_tags)
    data['sentiment_tags'] = data['sentiment_tags'].apply(replace_SEP).apply(replace_sentiment_tags)

    data_test['bio_tags'] = data_test['bio_tags'].apply(convert_to_array)
    data_test['sentiment_tags'] = data_test['sentiment_tags'].apply(convert_to_array)
    data_test['tokens'] = data_test['tokens'].apply(convert_to_array)

    data_test['bio_tags'] = data_test['bio_tags'].apply(replace_SEP).apply(replace_tags)
    data_test['sentiment_tags'] = data_test['sentiment_tags'].apply(replace_SEP).apply(replace_sentiment_tags)

    # define parameters for model
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    
    DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

    # define model
    modelABTE = ABTEModel(tokenizer, adapter=adapter)

    # load model and predict
    model_path = dir_name+'/model_lr3.0000000000000004e-05_epochs4_batch8.pkl'
    test_accuracy, test_report = modelABTE.test(data_test, load_model=model_path, device=DEVICE)
    test_pred, test_targets = modelABTE.predict_batch(data_test, load_model=model_path, device=DEVICE)

    train_accuracy, train_report = modelABTE.test(data, load_model=model_path, device=DEVICE)
    train_pred, train_targets = modelABTE.predict_batch(data, load_model=model_path, device=DEVICE)

    #save results
    if not os.path.exists('/results'):
        os.makedirs(dir_name+'/results')
        print('folder not exists, created result folder')

    #report
    with open(dir_name+'/results/test_report_lr{}_epochs{}_batch{}.csv'.format(lr, epochs, batch), 'w') as f:
        for r in test_report.split('\n'):
            f.write(r + '\n')

    with open(dir_name+'/results/train_report_lr{}_epochs{}_batch{}.csv'.format(lr, epochs, batch), 'w') as f:
        for r in train_report.split('\n'):
            f.write(r + '\n')

    #predictions
    data_test['Predicted'] = test_pred
    data_test['Actual'] = test_targets
    data_test.to_csv(dir_name+'/results/test_pred_lr{}_epochs{}_batch{}.csv'.format(lr, epochs, batch), index=False)

    data['Predicted'] = train_pred
    data['Actual'] = train_targets
    data.to_csv(dir_name+'/results/train_pred_lr{}_epochs{}_batch{}.csv'.format(lr, epochs, batch), index=False)

    #accuracy
    test_accuracy = np.array(test_accuracy)
    train_accuracy = np.array(train_accuracy)

    with open(dir_name+'/results/test_accuracy_lr{}_epochs{}_batch{}.csv'.format(lr, epochs, batch), 'w') as f:
        f.write(str(test_accuracy))
    with open(dir_name+'/results/train_accuracy_lr{}_epochs{}_batch{}.csv'.format(lr, epochs, batch), 'w') as f:
        f.write(str(train_accuracy))
    
    print('completed!!!!! huurayyyyyy!!!!')

In [3]:
run_ABTE_test_train(False, True)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/2 [00:00<?, ?it/s] 50%|█████     | 1/2 [00:02<00:02,  2.03s/it]100%|██████████| 2/2 [00:02<00:00,  1.14s/it]100%|██████████| 2/2 [00

folder not exists, created result folder
completed!!!!! huurayyyyyy!!!!





In [5]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model_path = 'model_ABTE_scheduler/model_lr3.0000000000000004e-05_epochs4_batch8.pkl'

# define model
modelABTE = ABTEModel(tokenizer, False)
word_pieces, predictions, outputs = modelABTE.predict('It was a little tighter in the chest area than i thought it would be, but i still like it very much!', load_model=model_path, device=DEVICE)

print(word_pieces)
print(predictions)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


['good', 'fit', 'and', 'length']
[0, 0, 0, 0]
