# Sentiment Analysis with 9 Transformer Models and Ensemble
This notebook runs 9 pre-trained transformer models on the IMDB dataset and compares them based on evaluation metrics including sensitivity, specificity, precision, recall, F1, and accuracy. Finally, an ensemble model is built using majority voting.

In [None]:
# Install dependencies
!pip install transformers datasets scikit-learn
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from collections import Counter
import torch

models = [
    'bert-base-uncased',
    'distilbert-base-uncased',
    'roberta-base',
    'albert-base-v2',
    'xlnet-base-cased',
    'google/electra-base-discriminator',
    'camembert-base',
    'microsoft/deberta-base',
    'flaubert/flaubert_base_cased'
]

# Load IMDB dataset and reduce size for speed
dataset = load_dataset("imdb")
test_data = dataset['test'].select(range(200))

def get_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    return sensitivity, specificity, precision, recall, f1, accuracy

results = []
predictions_all = []

for model_name in models:
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
        pipe = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
        
        preds = []
        for text in test_data['text']:
            result = pipe(text[:512])[0]['label']
            preds.append(1 if 'POS' in result or 'LABEL_1' in result else 0)
        
        predictions_all.append(preds)
        y_true = test_data['label']
        metrics = get_metrics(y_true, preds)
        results.append((model_name, *metrics))
    except Exception as e:
        print(f"Model {model_name} failed: {e}")
        continue

# Ensemble Voting
ensemble_preds = []
for i in range(len(test_data)):
    votes = [pred[i] for pred in predictions_all if len(pred) == len(test_data)]
    ensemble_preds.append(Counter(votes).most_common(1)[0][0])

ensemble_metrics = get_metrics(test_data['label'], ensemble_preds)
results.append(("Ensemble", *ensemble_metrics))

# Convert to DataFrame
df_results = pd.DataFrame(results, columns=['Model', 'Sensitivity', 'Specificity', 'Precision', 'Recall', 'F1', 'Accuracy'])
df_results