# Analysis of Transformer-based Models

This notebook analyzes the sentiment analysis results from pre-trained transformer models (BERT, RoBERTa, ALBERT, XLNet) on both the gold and premo datasets.

Loads the libraries necessary for the script.

In [None]:
import json
import numpy as np
import pandas as pd
from pathlib import Path

## Gold Dataset Analysis

Load results from transformer models on the gold dataset

In [None]:
MODELS = ["bert", "roberta", "albert", "xlnet"]

gold_results = {}
for model in MODELS:
    filepath = Path(f"output_transformers/{model}_gold.json")
    if filepath.exists():
        with open(filepath) as f:
            gold_results[model] = json.load(f)
    else:
        print(f"Missing file: {filepath}")

print("Loaded gold dataset results for models:", list(gold_results.keys()))

Generate LaTeX Table for Gold Dataset

In [None]:
sorted_models = sorted(gold_results.keys())

all_metrics = {}
for model in sorted_models:
    data = gold_results[model]
    metrics = data['metrics']
    
    all_metrics[model] = {
        'precision': [
            metrics['per_class']['precision']['positive'],
            metrics['per_class']['precision']['negative'],
            metrics['per_class']['precision']['neutral']
        ],
        'recall': [
            metrics['per_class']['recall']['positive'],
            metrics['per_class']['recall']['negative'],
            metrics['per_class']['recall']['neutral']
        ],
        'f1': [
            metrics['per_class']['f1_score']['positive'],
            metrics['per_class']['f1_score']['negative'],
            metrics['per_class']['f1_score']['neutral']
        ],
        'macro_precision': metrics['macro']['precision'],
        'micro_precision': metrics['micro']['precision'],
        'macro_recall': metrics['macro']['recall'],
        'micro_recall': metrics['micro']['recall'],
        'macro_f1': metrics['macro']['f1_score'],
        'micro_f1': metrics['micro']['f1_score'],
        'n': data.get('n', 'N/A')
    }

best_values = {
    'pos_prec': max(m['precision'][0] for m in all_metrics.values()),
    'pos_rec': max(m['recall'][0] for m in all_metrics.values()),
    'pos_f1': max(m['f1'][0] for m in all_metrics.values()),
    'neg_prec': max(m['precision'][1] for m in all_metrics.values()),
    'neg_rec': max(m['recall'][1] for m in all_metrics.values()),
    'neg_f1': max(m['f1'][1] for m in all_metrics.values()),
    'neu_prec': max(m['precision'][2] for m in all_metrics.values()),
    'neu_rec': max(m['recall'][2] for m in all_metrics.values()),
    'neu_f1': max(m['f1'][2] for m in all_metrics.values()),
    'macro_prec': max(m['macro_precision'] for m in all_metrics.values()),
    'macro_rec': max(m['macro_recall'] for m in all_metrics.values()),
    'macro_f1': max(m['macro_f1'] for m in all_metrics.values()),
    'micro_prec': max(m['micro_precision'] for m in all_metrics.values()),
    'micro_rec': max(m['micro_recall'] for m in all_metrics.values()),
    'micro_f1': max(m['micro_f1'] for m in all_metrics.values()),
}

table_data = []
for model in sorted_models:
    metrics = all_metrics[model]
    model_display = model.upper()
    
    def fmt(value, best):
        val_str = f"{int(value*100)}\\%"
        if abs(value - best) < 1e-9:  
            return f"\\textbf{{{val_str}}}"
        return val_str
    
    table_data.append([f"{model_display} (n={metrics['n']})", "Positive", 
                       fmt(metrics['precision'][0], best_values['pos_prec']),
                       fmt(metrics['recall'][0], best_values['pos_rec']),
                       fmt(metrics['f1'][0], best_values['pos_f1'])])
    table_data.append([f"{model_display} (n={metrics['n']})", "Negative", 
                       fmt(metrics['precision'][1], best_values['neg_prec']),
                       fmt(metrics['recall'][1], best_values['neg_rec']),
                       fmt(metrics['f1'][1], best_values['neg_f1'])])
    table_data.append([f"{model_display} (n={metrics['n']})", "Neutral", 
                       fmt(metrics['precision'][2], best_values['neu_prec']),
                       fmt(metrics['recall'][2], best_values['neu_rec']),
                       fmt(metrics['f1'][2], best_values['neu_f1'])])
    table_data.append([f"{model_display} (n={metrics['n']})", "Macro Avg.", 
                       fmt(metrics['macro_precision'], best_values['macro_prec']),
                       fmt(metrics['macro_recall'], best_values['macro_rec']),
                       fmt(metrics['macro_f1'], best_values['macro_f1'])])
    table_data.append([f"{model_display} (n={metrics['n']})", "Micro Avg.", 
                       fmt(metrics['micro_precision'], best_values['micro_prec']),
                       fmt(metrics['micro_recall'], best_values['micro_rec']),
                       fmt(metrics['micro_f1'], best_values['micro_f1'])])

df = pd.DataFrame(table_data, columns=['Model', 'Class', 'Precision', 'Recall', 'F1-score'])

latex_table = df.to_latex(
    index=False, 
    escape=False,
    column_format='|l|l|r|r|r|',
    caption="Performance of transformer-based models on the gold dataset",
    label="tab:transformers_gold"
)

lines = latex_table.split('\n')
processed_lines = []
current_model = None
row_count = 0
first_model = True

for i, line in enumerate(lines):
    if 'Model &' in line and 'Class &' in line:
        line = line.replace('Model', '\\textbf{Model}')
        line = line.replace('Class', '\\textbf{Class}')
        line = line.replace('Precision', '\\textbf{Precision}')
        line = line.replace('Recall', '\\textbf{Recall}')
        line = line.replace('F1-score', '\\textbf{F1-score}')
        processed_lines.append(line)
        continue
    
    if '&' in line and 'toprule' not in line and 'midrule' not in line and 'bottomrule' not in line:
        parts = line.split('&')
        if len(parts) >= 5:
            model_name = parts[0].strip()
            
            if model_name and model_name != '':
                if model_name != current_model:
                    if not first_model:
                        processed_lines.append('\\hline')
                    first_model = False
                    
                    current_model = model_name
                    row_count = 1
                    parts[0] = f" \\multirow{{5}}{{*}}{{{model_name}}}"
                else:
                    row_count += 1
                    parts[0] = " "
                    
                line = ' & '.join(parts)
                
                if row_count < 5 and line.strip().endswith('\\\\'):
                    line = line.rstrip() + ' \\cline{2-5}'
    
    processed_lines.append(line)

latex_table = '\n'.join(processed_lines)

latex_table = latex_table.replace('\\toprule', '\\hline')
latex_table = latex_table.replace('\\midrule', '\\hline')
latex_table = latex_table.replace('\\bottomrule', '\\hline')

print(latex_table)

## Premo Dataset Analysis

Load results from transformer models on the premo dataset

In [None]:
premo_results = {}
for model in MODELS:
    filepath = Path(f"output_transformers/{model}_premo.json")
    if filepath.exists():
        with open(filepath) as f:
            premo_results[model] = json.load(f)
    else:
        print(f"Missing file: {filepath}")

print("Loaded premo dataset results for models:", list(premo_results.keys()))

Generate LaTeX Table for Premo Dataset

In [None]:
sorted_models_premo = sorted(premo_results.keys())

all_metrics_premo = {}
for model in sorted_models_premo:
    data = premo_results[model]
    metrics = data['metrics']
    
    all_metrics_premo[model] = {
        'precision': [
            metrics['per_class']['precision']['positive'],
            metrics['per_class']['precision']['negative'],
            metrics['per_class']['precision']['neutral']
        ],
        'recall': [
            metrics['per_class']['recall']['positive'],
            metrics['per_class']['recall']['negative'],
            metrics['per_class']['recall']['neutral']
        ],
        'f1': [
            metrics['per_class']['f1_score']['positive'],
            metrics['per_class']['f1_score']['negative'],
            metrics['per_class']['f1_score']['neutral']
        ],
        'macro_precision': metrics['macro']['precision'],
        'micro_precision': metrics['micro']['precision'],
        'macro_recall': metrics['macro']['recall'],
        'micro_recall': metrics['micro']['recall'],
        'macro_f1': metrics['macro']['f1_score'],
        'micro_f1': metrics['micro']['f1_score'],
        'n': data.get('n', 'N/A')
    }

best_values_premo = {
    'pos_prec': max(m['precision'][0] for m in all_metrics_premo.values()),
    'pos_rec': max(m['recall'][0] for m in all_metrics_premo.values()),
    'pos_f1': max(m['f1'][0] for m in all_metrics_premo.values()),
    'neg_prec': max(m['precision'][1] for m in all_metrics_premo.values()),
    'neg_rec': max(m['recall'][1] for m in all_metrics_premo.values()),
    'neg_f1': max(m['f1'][1] for m in all_metrics_premo.values()),
    'neu_prec': max(m['precision'][2] for m in all_metrics_premo.values()),
    'neu_rec': max(m['recall'][2] for m in all_metrics_premo.values()),
    'neu_f1': max(m['f1'][2] for m in all_metrics_premo.values()),
    'macro_prec': max(m['macro_precision'] for m in all_metrics_premo.values()),
    'macro_rec': max(m['macro_recall'] for m in all_metrics_premo.values()),
    'macro_f1': max(m['macro_f1'] for m in all_metrics_premo.values()),
    'micro_prec': max(m['micro_precision'] for m in all_metrics_premo.values()),
    'micro_rec': max(m['micro_recall'] for m in all_metrics_premo.values()),
    'micro_f1': max(m['micro_f1'] for m in all_metrics_premo.values()),
}

table_data_premo = []
for model in sorted_models_premo:
    metrics = all_metrics_premo[model]
    model_display = model.upper()
    
    def fmt(value, best):
        val_str = f"{int(value*100)}\\%"
        if abs(value - best) < 1e-9:  
            return f"\\textbf{{{val_str}}}"
        return val_str
    
    table_data_premo.append([f"{model_display} (n={metrics['n']})", "Positive", 
                       fmt(metrics['precision'][0], best_values_premo['pos_prec']),
                       fmt(metrics['recall'][0], best_values_premo['pos_rec']),
                       fmt(metrics['f1'][0], best_values_premo['pos_f1'])])
    table_data_premo.append([f"{model_display} (n={metrics['n']})", "Negative", 
                       fmt(metrics['precision'][1], best_values_premo['neg_prec']),
                       fmt(metrics['recall'][1], best_values_premo['neg_rec']),
                       fmt(metrics['f1'][1], best_values_premo['neg_f1'])])
    table_data_premo.append([f"{model_display} (n={metrics['n']})", "Neutral", 
                       fmt(metrics['precision'][2], best_values_premo['neu_prec']),
                       fmt(metrics['recall'][2], best_values_premo['neu_rec']),
                       fmt(metrics['f1'][2], best_values_premo['neu_f1'])])
    table_data_premo.append([f"{model_display} (n={metrics['n']})", "Macro Avg.", 
                       fmt(metrics['macro_precision'], best_values_premo['macro_prec']),
                       fmt(metrics['macro_recall'], best_values_premo['macro_rec']),
                       fmt(metrics['macro_f1'], best_values_premo['macro_f1'])])
    table_data_premo.append([f"{model_display} (n={metrics['n']})", "Micro Avg.", 
                       fmt(metrics['micro_precision'], best_values_premo['micro_prec']),
                       fmt(metrics['micro_recall'], best_values_premo['micro_rec']),
                       fmt(metrics['micro_f1'], best_values_premo['micro_f1'])])

df_premo = pd.DataFrame(table_data_premo, columns=['Model', 'Class', 'Precision', 'Recall', 'F1-score'])

latex_table_premo = df_premo.to_latex(
    index=False, 
    escape=False,
    column_format='|l|l|r|r|r|',
    caption="Performance of transformer-based models on the premo dataset",
    label="tab:transformers_premo"
)

lines = latex_table_premo.split('\n')
processed_lines = []
current_model = None
row_count = 0
first_model = True

for i, line in enumerate(lines):
    if 'Model &' in line and 'Class &' in line:
        line = line.replace('Model', '\\textbf{Model}')
        line = line.replace('Class', '\\textbf{Class}')
        line = line.replace('Precision', '\\textbf{Precision}')
        line = line.replace('Recall', '\\textbf{Recall}')
        line = line.replace('F1-score', '\\textbf{F1-score}')
        processed_lines.append(line)
        continue
    
    if '&' in line and 'toprule' not in line and 'midrule' not in line and 'bottomrule' not in line:
        parts = line.split('&')
        if len(parts) >= 5:
            model_name = parts[0].strip()
            
            if model_name and model_name != '':
                if model_name != current_model:
                    if not first_model:
                        processed_lines.append('\\hline')
                    first_model = False
                    
                    current_model = model_name
                    row_count = 1
                    parts[0] = f" \\multirow{{5}}{{*}}{{{model_name}}}"
                else:
                    row_count += 1
                    parts[0] = " "
                    
                line = ' & '.join(parts)
                
                if row_count < 5 and line.strip().endswith('\\\\'):
                    line = line.rstrip() + ' \\cline{2-5}'
    
    processed_lines.append(line)

latex_table_premo = '\n'.join(processed_lines)

latex_table_premo = latex_table_premo.replace('\\toprule', '\\hline')
latex_table_premo = latex_table_premo.replace('\\midrule', '\\hline')
latex_table_premo = latex_table_premo.replace('\\bottomrule', '\\hline')

print(latex_table_premo)