# Pre-trained Model

### Select a pre-trained model for your project and perform data preprocessing

In [20]:
from functions_variables import *

In [21]:
#Load the dataset
path = '../data/preprocessed/'
files = {name: f'{path}{name}.csv' for name in set_names}
dataset = load_dataset('csv', data_files=files)
limit = 1000  # 25000 for the full dataset
train = dataset["train"].shuffle(seed=42).select(range(limit))
reviews = train['text']
labels = train['label']
test = dataset["test"].shuffle(seed=42).select(range(limit))
unsupervised = dataset["unsupervised"].shuffle(seed=42).select(range(limit))

In [22]:
# Load the tokenizer and model
model_names = {
    'robert': 'aychang/roberta-base-imdb',
    'finetuned-sst-2-english': 'distilbert-base-uncased-finetuned-sst-2-english',
    'bert': 'bert-base-uncased',
    'distilbert': 'distilbert-base-uncased'
}
models = {}
tokenizers = {}
nlps = {}
predictions = {}
prediction_labels = {}
labels_true = {}
reports = {}
for key, name in model_names.items():
    models[key] = AutoModelForQuestionAnswering.from_pretrained(name)
    tokenizers[key] = AutoTokenizer.from_pretrained(name)
    # Use pipeline
    nlps[key] = pipeline("sentiment-analysis", model=name, tokenizer=name)


Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at aychang/roberta-base-imdb and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use mps:0
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use mps:0
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassifica

In [23]:
for key, value in model_names.items():
    print(f"\nModel: {key}")
    predictions[key] = nlps[key](reviews, truncation=True)
    labels_true[key] = labels
    for label, result in zip(labels, predictions[key]):
        print(f"Label: {label} - Sentiment: {result['label']}, Confidence: {result['score']:.4f}")


Model: robert
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 0 - Sentiment: neg, Confidence: 0.9908
Label: 1 - Sentiment: neg, Confidence: 0.9149
Label: 0 - Sentiment: neg, Confidence: 0.9984
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 1 - Sentiment: pos, Confidence: 0.9986
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 0 - Sentiment: neg, Confidence: 0.9963
Label: 1 - Sentiment: pos, Confidence: 0.9990
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 1 - Sentiment: pos, Confidence: 0.9991
Label: 0 - Sentiment: neg, Confidence: 0.9985
Label: 0 - Sentimen

In [24]:
# Ensure the dataset is balanced between positive and negative samples to prevent bias.
pd.Series(labels).value_counts()

0    512
1    488
Name: count, dtype: int64

In [25]:
for key, value in model_names.items():
    print(f"\nModel: {key}")
    counts = pd.DataFrame(predictions[key])['label'].value_counts()
    print(counts)


Model: robert
label
neg    522
pos    478
Name: count, dtype: int64

Model: finetuned-sst-2-english
label
NEGATIVE    589
POSITIVE    411
Name: count, dtype: int64

Model: bert
label
LABEL_1    993
LABEL_0      7
Name: count, dtype: int64

Model: distilbert
label
LABEL_1    696
LABEL_0    304
Name: count, dtype: int64


In [26]:
predictions['robert'][0], predictions['distilbert'][0]

({'label': 'pos', 'score': 0.9990720748901367},
 {'label': 'LABEL_0', 'score': 0.5068899393081665})

In [27]:
# Evaluate: Confusion Matrix, Accuracy, Precision, Recall, F1 Score
for key, value in model_names.items():
    # converting "pos" into a number... 1 = positive, 0 = negative
    prediction_labels[key] = [1 if prediction['label'].lower()[:3] == 'pos' else 0 for prediction in predictions[key]]
    reports[key] = classification_report(labels_true[key], prediction_labels[key])

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
# Display the classification report
for key, value in model_names.items():
    print(f"\nModel: {key}")
    print(reports[key])


Model: robert
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       512
           1       0.98      0.96      0.97       488

    accuracy                           0.97      1000
   macro avg       0.97      0.97      0.97      1000
weighted avg       0.97      0.97      0.97      1000


Model: finetuned-sst-2-english
              precision    recall  f1-score   support

           0       0.82      0.94      0.88       512
           1       0.93      0.78      0.85       488

    accuracy                           0.86      1000
   macro avg       0.87      0.86      0.86      1000
weighted avg       0.87      0.86      0.86      1000


Model: bert
              precision    recall  f1-score   support

           0       0.51      1.00      0.68       512
           1       0.00      0.00      0.00       488

    accuracy                           0.51      1000
   macro avg       0.26      0.50      0.34      1000
weighted avg  