# Imports

In [None]:
!pip install datasets evaluate

Collecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━

In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.33.2-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m90.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m70.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, safetensors, transformers
Successfully installed safetensors-0.3.3 tokenizers-0.13.3 transformers-4.33.2


In [None]:
# Mount google drive for saving and loading features, models, dataset, etc
from google.colab import drive
drive.mount('/content/drive')
root_path = 'drive/My Drive/Colab Notebooks/'

Mounted at /content/drive


In [None]:
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import torchvision.models as models
from transformers import MobileNetV2FeatureExtractor, MobileNetV2ForImageClassification
from transformers import TrainingArguments, Trainer

# Load the Dataset

In [None]:
# Load the dataset
image_folder_path = root_path+"/NIH_Chest_XRay/sample/images"
dataset = load_dataset("imagefolder", data_dir=image_folder_path)

train_val_split = 0.2
split = dataset["train"].train_test_split(train_val_split, seed=42)
train_dataset = split["train"]
test_dataset = split["test"]

Resolving data files:   0%|          | 0/5607 [00:00<?, ?it/s]

Downloading data files:   0%|          | 0/5607 [00:00<?, ?it/s]

Downloading data files: 0it [00:00, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

## Preprocess the dataset

In [None]:
# Load and preprocess data
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Extract features from the dataset
train_features = []
train_labels = []
for example in train_dataset:
    image = example["image"]
    image = image.convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension for single image
    train_features.append(image)
    train_labels.append(example["labels"])

# Convert features and labels to tensors
train_features = torch.cat(train_features, dim=0)
train_labels = torch.tensor(train_labels)

train_dataset_cnn = TensorDataset(train_features, train_labels)
train_loader = DataLoader(train_dataset_cnn, batch_size=32, shuffle=True)

# Extract features from the dataset
test_features = []
test_labels = []
for example in test_dataset:
    image = example["image"]
    image = image.convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension for single image
    test_features.append(image)
    test_labels.append(example["labels"])

# Convert features and labels to tensors
test_features = torch.cat(test_features, dim=0)
test_labels = torch.tensor(test_labels)

test_dataset_cnn = TensorDataset(test_features, test_labels)
test_loader = DataLoader(test_dataset_cnn, batch_size=32, shuffle=True)

In [None]:
# Save features for future use
torch.save(train_features, root_path+"features/train_features_plain_cnn.pt")
torch.save(train_labels, root_path+"features/train_labels_plain_cnn.pt")

torch.save(test_features, root_path+"features/test_features_plain_cnn.pt")
torch.save(test_labels, root_path+"features/test_labels_plain_cnn.pt")

In [None]:
# Load the saved features
train_features = torch.load(root_path+"features/train_features_plain_cnn.pt")
train_labels = torch.load(root_path+"features/train_labels_plain_cnn.pt")

test_features = torch.load(root_path+"features/test_features_plain_cnn.pt")
test_labels = torch.load(root_path+"features/test_labels_plain_cnn.pt")

## Minority class selection

In [None]:
# Store the unique labels
unique_labels = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema',
                 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'No Finding', 'Nodule', 'Pleural_Thickening',
                 'Pneumonia', 'Pneumothorax']

# Convert one-hot encoded labels to class indices
labels = dataset['train']['labels']
label_data = np.array(labels)
# Calculate class counts
class_counts = np.sum(label_data, axis=0)

num_classes = len(labels[0])

# Print unique labels along with their corresponding sample counts
for label, count in zip(unique_labels, class_counts):
    print(f"{label}: {count} samples")

Atelectasis: 508.0 samples
Cardiomegaly: 141.0 samples
Consolidation: 226.0 samples
Edema: 118.0 samples
Effusion: 644.0 samples
Emphysema: 127.0 samples
Fibrosis: 84.0 samples
Hernia: 13.0 samples
Infiltration: 967.0 samples
Mass: 284.0 samples
No Finding: 3044.0 samples
Nodule: 313.0 samples
Pleural_Thickening: 176.0 samples
Pneumonia: 62.0 samples
Pneumothorax: 271.0 samples


In [None]:
# Determine classes with less than 50 samples
min_samples = 100
labels_to_drop = [idx for idx, count in enumerate(class_counts) if count < min_samples]

total_length = len(labels[0])
minority_classes = [0 if idx in labels_to_drop else 1 for idx in range(total_length)]

# Print the new list
print(minority_classes)

# Select only few 'No Finding' samples
# Find the label index with the most samples (11th label)
no_finding_index = 10  # Index corresponding to the 11th label
no_finding_samples = 500
no_finding = [idx for idx, label in enumerate(label_data) if label[no_finding_index] == 1]
no_finding_indices = random.sample(no_finding, no_finding_samples)

# Collect indices of label sets that don't have any of the minority classes
indices_without_nofinding = [idx for idx, label in enumerate(label_data) if label[no_finding_index] == 0]
# Get labels
labels_without_nofinding = [label_data[idx] for idx in indices_without_nofinding]

majority_indices = []

for idx, label in enumerate(labels_without_nofinding):
    if not(label[6] == 1 or label[7] == 1 or label[13] == 1):
        majority_indices.append(idx)

combined_indices = no_finding_indices + majority_indices
# Print the collected indices
print(len(majority_indices))

[1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1]
2404


In [None]:
selected_features = [dataset['train'][idx]['image'] for idx in combined_indices]
selected_labels = [dataset['train'][idx]['labels'] for idx in combined_indices]

# Create new tensors by dropping the specified column
updated_labels = [label[:6] + label[8:13] + label[14:] for label in selected_labels]

# Split the data into train and test sets
train_feat, test_feat, train_lab, test_lab = train_test_split(
    selected_features, updated_labels, test_size=0.2, random_state=42
)

# Print the shapes of the train and test sets
print("Train features shape:", len(train_feat))
print("Train labels shape:", len(train_lab))
print("Test features shape:", len(test_feat))
print("Test labels shape:", len(test_lab))

Train features shape: 2323
Train labels shape: 2323
Test features shape: 581
Test labels shape: 581


# MobileNet Training

## Initial model and hyperparameters

In [None]:
# Load the pretrained MobileNetV2 model
model = models.mobilenet_v2(pretrained=True)

# Modify the last fully connected layer for your classification task
num_classes = len(train_labels[0])
in_features = model.classifier[-1].in_features
model.classifier[-1] = torch.nn.Linear(in_features, num_classes)

learning_rate = 0.001
num_epochs = 15
batch_size = 32
lr_scheduler_step = 3
lr_scheduler_gamma = 0.5

# Define optimizer and learning rate scheduler
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=lr_scheduler_step, gamma=lr_scheduler_gamma)

# Move model and data to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = torch.nn.BCEWithLogitsLoss()
# Define your custom loss function
#criterion = WeightedBinaryCrossEntropyLoss(pos_weights, neg_weights)
criterion.to(device)



BCEWithLogitsLoss()

In [None]:
# Define your DataLoader for train and test datasets
train_dataset = TensorDataset(train_features, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_features, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Training Loop NIH Original

In [None]:
test_losses = []
train_losses = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0
    running_preds = []
    running_labels = []

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item() * inputs.size(0)

        # Convert predictions and labels to binary (0 or 1)
        preds = torch.sigmoid(outputs) > 0.5

    train_epoch_loss = running_train_loss / len(train_loader.dataset)
    train_losses.append(train_epoch_loss)

    # Evaluation on test set
    model.eval()
    running_test_loss = 0.0
    running_test_preds = []
    running_test_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_test_loss += loss.item() * inputs.size(0)

            # Convert predictions and labels to binary (0 or 1)
            test_preds = torch.sigmoid(outputs) > 0.5
            running_test_preds.append(test_preds.cpu().numpy())
            running_test_labels.append(labels.cpu().numpy())

    test_epoch_loss = running_test_loss / len(test_loader.dataset)
    test_losses.append(test_epoch_loss)
    # Flatten predictions and labels for test set
    all_test_preds = torch.tensor(np.concatenate(running_test_preds, axis=0)).to(device)
    all_test_labels = torch.tensor(np.concatenate(running_test_labels, axis=0)).to(device)

    # Calculate metrics for test set
    test_accuracy = accuracy_score(all_test_labels.cpu(), all_test_preds.cpu())
    test_precision = precision_score(all_test_labels.cpu(), all_test_preds.cpu(), average='micro')
    test_recall = recall_score(all_test_labels.cpu(), all_test_preds.cpu(), average='micro')
    test_f1 = f1_score(all_test_labels.cpu(), all_test_preds.cpu(), average='micro')

    print(f"Epoch [{epoch+1}/{num_epochs}] - "
          f"Train Loss: {train_epoch_loss:.4f} - "
          f"Test Loss: {test_epoch_loss:.4f} - "
          f"Accuracy: {test_accuracy:.4f} - "
          f"Precision: {test_precision:.4f} - "
          f"Recall: {test_recall:.4f} - "
          f"F1: {test_f1:.4f}")


Epoch [1/15] - Train Loss: 0.2159 - Test Loss: 0.1980 - Accuracy: 0.3868 - Precision: 0.7057 - Recall: 0.3120 - F1: 0.4327
Epoch [2/15] - Train Loss: 0.1976 - Test Loss: 0.2131 - Accuracy: 0.2264 - Precision: 0.7910 - Recall: 0.1905 - F1: 0.3071
Epoch [3/15] - Train Loss: 0.1932 - Test Loss: 0.2024 - Accuracy: 0.4831 - Precision: 0.6385 - Recall: 0.3911 - F1: 0.4851
Epoch [4/15] - Train Loss: 0.1893 - Test Loss: 0.2030 - Accuracy: 0.3458 - Precision: 0.6985 - Recall: 0.2948 - F1: 0.4146
Epoch [5/15] - Train Loss: 0.1839 - Test Loss: 0.2035 - Accuracy: 0.4198 - Precision: 0.6341 - Recall: 0.3738 - F1: 0.4704
Epoch [6/15] - Train Loss: 0.1822 - Test Loss: 0.1990 - Accuracy: 0.4447 - Precision: 0.6537 - Recall: 0.3731 - F1: 0.4751
Epoch [7/15] - Train Loss: 0.1712 - Test Loss: 0.2053 - Accuracy: 0.3396 - Precision: 0.6145 - Recall: 0.3048 - F1: 0.4075
Epoch [8/15] - Train Loss: 0.1653 - Test Loss: 0.2085 - Accuracy: 0.4127 - Precision: 0.6536 - Recall: 0.3472 - F1: 0.4535
Epoch [9/15] - T

In [None]:
outputs[0]

tensor([ 0.2758, -0.1331, -0.2119,  0.0497, -0.0336, -0.2017, -0.4633,  0.1044,
         0.5138,  0.2606,  0.1077,  0.2004,  0.1389, -0.0854,  0.1152],
       device='cuda:0', grad_fn=<SelectBackward0>)

## Training Loop NIH selected

In [None]:
test_losses = []
train_losses = []

# Training loop
# Training loop
for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0
    running_preds = []
    running_labels = []

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item() * inputs.size(0)

        # Convert predictions and labels to binary (0 or 1)
        preds = torch.sigmoid(outputs) > 0.5
        #running_preds.append(preds.cpu().numpy())
        #running_labels.append(labels.cpu().numpy())

    train_epoch_loss = running_train_loss / len(train_loader.dataset)
    train_losses.append(train_epoch_loss)

    # Evaluation on test set
    model.eval()
    running_test_loss = 0.0
    running_test_preds = []
    running_test_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_test_loss += loss.item() * inputs.size(0)

            # Convert predictions and labels to binary (0 or 1)
            test_preds = torch.sigmoid(outputs) > 0.5
            running_test_preds.append(test_preds.cpu().numpy())
            running_test_labels.append(labels.cpu().numpy())

    test_epoch_loss = running_test_loss / len(test_loader.dataset)
    test_losses.append(test_epoch_loss)
    # Flatten predictions and labels for test set
    all_test_preds = torch.tensor(np.concatenate(running_test_preds, axis=0)).to(device)
    all_test_labels = torch.tensor(np.concatenate(running_test_labels, axis=0)).to(device)

    # Calculate metrics for test set
    test_accuracy = accuracy_score(all_test_labels.cpu(), all_test_preds.cpu())
    test_precision = precision_score(all_test_labels.cpu(), all_test_preds.cpu(), average='micro')
    test_recall = recall_score(all_test_labels.cpu(), all_test_preds.cpu(), average='micro')
    test_f1 = f1_score(all_test_labels.cpu(), all_test_preds.cpu(), average='micro')

    print(f"Epoch [{epoch+1}/{num_epochs}] - "
          f"Train Loss: {train_epoch_loss:.4f} - "
          f"Test Loss: {test_epoch_loss:.4f} - "
          f"Accuracy: {test_accuracy:.4f} - "
          f"Precision: {test_precision:.4f} - "
          f"Recall: {test_recall:.4f} - "
          f"F1: {test_f1:.4f}")


Epoch [1/15] - Train Loss: 0.2148 - Test Loss: 0.1979 - Accuracy: 0.4554 - Precision: 0.6750 - Recall: 0.3674 - F1: 0.4758
Epoch [2/15] - Train Loss: 0.2001 - Test Loss: 0.2005 - Accuracy: 0.2674 - Precision: 0.7246 - Recall: 0.2308 - F1: 0.3501
Epoch [3/15] - Train Loss: 0.1941 - Test Loss: 0.1985 - Accuracy: 0.3601 - Precision: 0.7047 - Recall: 0.2933 - F1: 0.4142
Epoch [4/15] - Train Loss: 0.1899 - Test Loss: 0.1975 - Accuracy: 0.4020 - Precision: 0.6780 - Recall: 0.3436 - F1: 0.4561
Epoch [5/15] - Train Loss: 0.1860 - Test Loss: 0.1981 - Accuracy: 0.3761 - Precision: 0.7097 - Recall: 0.3199 - F1: 0.4410
Epoch [6/15] - Train Loss: 0.1800 - Test Loss: 0.2088 - Accuracy: 0.1925 - Precision: 0.6487 - Recall: 0.1646 - F1: 0.2626
Epoch [7/15] - Train Loss: 0.1734 - Test Loss: 0.2053 - Accuracy: 0.4002 - Precision: 0.6565 - Recall: 0.3408 - F1: 0.4487
Epoch [8/15] - Train Loss: 0.1660 - Test Loss: 0.2270 - Accuracy: 0.4091 - Precision: 0.6496 - Recall: 0.3372 - F1: 0.4439
Epoch [9/15] - T

In [None]:
def compute_metrics(preds, labels):
    # Round predictions to 0 or 1
    rounded_preds = preds.round()

    accuracy = accuracy_score(labels, rounded_preds)
    f1 = f1_score(labels, rounded_preds, average='micro')  # micro-averaging for multilabel
    precision = precision_score(labels, rounded_preds, average='micro')
    recall = recall_score(labels, rounded_preds, average='micro')

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
    }

In [None]:
# Get the training and validation losses from the Trainer's history
train_losses = trainer.history["train_loss"]
eval_losses = trainer.history["eval_loss"]

# Plot the training and validation losses
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label="Training Loss")
plt.plot(eval_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.legend()
plt.show()

# **MobileNet for Dataverse**

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

# Define transformations for data preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the dataset
train_dataset = torchvision.datasets.ImageFolder(root_path+'data/train', transform=transform)
test_dataset = torchvision.datasets.ImageFolder(root_path+'data/test', transform=transform)
val_dataset = torchvision.datasets.ImageFolder(root_path+'data/validation', transform=transform)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Load MobileNet model
model = models.mobilenet_v2(pretrained=True)
num_classes = len(train_dataset.classes)
model.classifier[1] = nn.Linear(1280, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Lists to store training and validation losses, accuracy, and AUC-ROC
train_losses = []
val_losses = []
val_accuracies = []
val_aurocs = []

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)

    # Calculate average training loss for the epoch
    train_loss /= len(train_loader.dataset)
    train_losses.append(train_loss)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_predictions.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate average validation loss for the epoch
    val_loss /= len(val_loader.dataset)
    val_losses.append(val_loss)

    # Calculate validation accuracy
    val_accuracy = 100 * correct / total
    val_accuracies.append(val_accuracy)

    # Calculate validation AUC-ROC
    val_auroc = roc_auc_score(all_labels, all_predictions, average='macro')
    val_aurocs.append(val_auroc)

    print(f"Epoch [{epoch+1}/{num_epochs}] - "
          f"Train Loss: {train_loss:.4f}, "
          f"Val Loss: {val_loss:.4f}, "
          f"Val Acc: {val_accuracy:.2f}%, "
          f"Val AUC-ROC: {val_auroc:.4f}")

In [None]:
# Plotting
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs+1), train_losses, label='Train')
plt.plot(range(1, num_epochs+1), val_losses, label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs+1), val_accuracies, label='Accuracy')
plt.plot(range(1, num_epochs+1), val_aurocs, label='AUC-ROC')
plt.xlabel('Epoch')
plt.ylabel('Metric')
plt.title('Validation Accuracy and AUC-ROC')
plt.legend()

plt.tight_layout()
plt.show()