In [3]:
import json
import os

In [31]:
data_dir = "../../downstream_tasks_training/sst2/data"
models_dir = "../../downstream_tasks_training/sst2/models"

# load train, val and test data from the data directory in json format
train, val, test = [json.load(open(os.path.join(data_dir, file_name), 'r')) for file_name in ["train.json", "val.json", "test.json"]]

# change key from "labels" to "label" for train, val and test data
for data in [train, val, test]:
    data["label"] = data.pop("labels")

In [33]:
for data in [train, val, test]:
    data["label"] = data.pop("labels")

In [35]:
print(train.keys())

dict_keys(['text', 'labels'])


## Evaluation for individual models

In [46]:
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_curve, auc, precision_recall_curve
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from torch.utils.data import DataLoader, TensorDataset
from datasets import load_dataset

# Function to load data
def load_data(file_name):
    with open(file_name, 'r') as file:
        return json.load(file)

# Function to save metrics and plots
def save_metrics_and_plots(metrics, plots, model_name, models_dir):
    # Save metrics as CSV
    metrics_df = pd.DataFrame(metrics, index=[0])
    metrics_df.to_csv(os.path.join(models_dir, model_name, f"{model_name}_metrics.csv"), index=False)

    # Save plots
    for plot_name, plot in plots.items():
        plot.savefig(os.path.join(models_dir, model_name, f"{model_name}_{plot_name}.png"))

# Function to create a DataLoader
def create_dataloader(data, tokenizer, max_length=80, batch_size=32):
    input_ids = []
    attention_masks = []
    labels = []

    for sentence, label in zip(data['sentence'], data['label']):
        encoded_dict = tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=max_length,
            truncation=True,  # Explicitly activate truncation
            padding='max_length',  # Replace deprecated pad_to_max_length with padding='max_length'
            return_attention_mask=True,
            return_tensors='pt'
        )
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])
        labels.append(label)

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    print(labels)
    labels = torch.tensor(labels)

    dataset = TensorDataset(input_ids, attention_masks, labels)
    return DataLoader(dataset, batch_size=batch_size)

# Load data
data_dir = "../../downstream_tasks_training/sst2/data"
models_dir = "../../downstream_tasks_training/sst2/models"
#train, val, test = [load_data(os.path.join(data_dir, file_name)) for file_name in ["train.json", "val.json", "test.json"]]

# Load the SST-2 dataset
dataset = load_dataset("glue", "sst2")

# Access the train, validation, and test splits
train = dataset["train"]
val = dataset["validation"]
test = dataset["test"]


# change key from "labels" to "label" for train, val and test data
#for data in [train, val, test]:
#    data["label"] = data.pop("labels")

# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# Process and tokenize the data
test_dataloader = create_dataloader(test, tokenizer)

# Load models and evaluate
model_names = os.listdir(models_dir)
for model_name in model_names:
    model_path = os.path.join(models_dir, model_name)
    model = BertForSequenceClassification.from_pretrained(model_path)
    model.eval()

    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in test_dataloader:
            inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
            outputs = model(**inputs)
            logits = outputs[1]
            predictions.extend(torch.argmax(logits, dim=1).tolist())
            true_labels.extend(batch[2].tolist())

    # Calculate metrics
    metrics = {
        'Accuracy': accuracy_score(true_labels, predictions),
        'Precision': precision_score(true_labels, predictions),
        'Recall': recall_score(true_labels, predictions),
        'F1 Score': f1_score(true_labels, predictions)
    }

    # Confusion Matrix
    cm = confusion_matrix(true_labels, predictions)
    fig_cm, ax = plt.subplots()
    ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.set_title('Confusion Matrix')
    # more code for a beautiful confusion matrix

    # ROC Curve and AUC
    fpr, tpr, _ = roc_curve(true_labels, predictions)
    roc_auc = auc(fpr, tpr)
    fig_roc, ax = plt.subplots()
    ax.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
    ax.set_title('ROC Curve')
    # more code for a beautiful ROC curve

    # Precision-Recall Curve
    precision, recall, _ = precision_recall_curve(true_labels, predictions)
    fig_prc, ax = plt.subplots()
    ax.plot(recall, precision)
    ax.set_title('Precision-Recall Curve')
    # more code for a beautiful Precision-Recall curve

    # Classification Report
    report = classification_report(true_labels, predictions)

    # Save metrics and plots
    plots = {'confusion_matrix': fig_cm, 'roc_curve': fig_roc, 'precision_recall_curve': fig_prc}
    save_metrics_and_plots(metrics, plots, model_name, models_dir)

    # Save classification report
    with open(os.path.join(models_dir, model_name, f"{model_name}_classification_report.txt"), 'w') as file:
        file.write(report)

[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '../../downstream_tasks_training/sst2/models/.DS_Store'. Use `repo_type` argument if needed.