In [1]:
!pip install medmnist

Collecting medmnist
  Downloading medmnist-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting fire (from medmnist)
  Downloading fire-0.7.0.tar.gz (87 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/87.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->medmnist)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->medmnist)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->medmnist)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->m

In [2]:
from medmnist import PneumoniaMNIST
import collections

# Load the datasets
train = PneumoniaMNIST(split='train', download=True)
val = PneumoniaMNIST(split='val', download=True)
test = PneumoniaMNIST(split='test', download=True)

# Count the number of samples per class in each split
print("Train class distribution:", collections.Counter([int(x[1]) for x in train]))
print("Val class distribution:", collections.Counter([int(x[1]) for x in val]))
print("Test class distribution:", collections.Counter([int(x[1]) for x in test]))


100%|██████████| 4.17M/4.17M [00:01<00:00, 3.71MB/s]


Train class distribution: Counter({1: 3494, 0: 1214})
Val class distribution: Counter({1: 389, 0: 135})
Test class distribution: Counter({1: 390, 0: 234})


  print("Train class distribution:", collections.Counter([int(x[1]) for x in train]))
  print("Val class distribution:", collections.Counter([int(x[1]) for x in val]))
  print("Test class distribution:", collections.Counter([int(x[1]) for x in test]))


In [4]:
!pip install torch



In [11]:
from medmnist import PneumoniaMNIST
from transformers import (
    AutoImageProcessor,
    ResNetForImageClassification,
    TrainingArguments,
    Trainer
)
from datasets import Dataset
from torchvision import transforms
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import collections

# 1. Load data (images are PIL)
raw_train = PneumoniaMNIST(split='train', download=True)
raw_val = PneumoniaMNIST(split='val', download=True)
raw_test = PneumoniaMNIST(split='test', download=True)

# 2. Calculate class weights (for training set)
class_counts = torch.tensor([
    collections.Counter([int(x[1]) for x in raw_train])[0],  # normal
    collections.Counter([int(x[1]) for x in raw_train])[1]   # pneumonia
])
class_weights = 1. / class_counts
class_weights = class_weights / class_weights.sum()

# 3. Data augmentation and preprocessing
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),  # PIL to tensor
])
val_test_transform = transforms.Compose([
    transforms.ToTensor(),  # PIL to tensor
])

feature_extractor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")

def preprocess_train(example):
    image = train_transform(example[0])
    image = image.repeat(3, 1, 1)
    encoding = feature_extractor(images=[image], return_tensors="pt")
    label = example[1]
    if isinstance(label, list):
        label = label[0]
    return {
        'pixel_values': encoding['pixel_values'].squeeze(0),
        'label': int(label)
    }

def preprocess_val_test(example):
    image = val_test_transform(example[0])
    image = image.repeat(3, 1, 1)
    encoding = feature_extractor(images=[image], return_tensors="pt")
    label = example[1]
    if isinstance(label, list):
        label = label[0]
    return {
        'pixel_values': encoding['pixel_values'].squeeze(0),
        'label': int(label)
    }


def to_hf_dataset(raw_dataset, preprocess_fn):
    data = {'image': [x[0] for x in raw_dataset], 'label': [x[1] for x in raw_dataset]}
    hf_dataset = Dataset.from_dict(data)
    hf_dataset = hf_dataset.map(lambda x: preprocess_fn((x['image'], x['label'])), remove_columns=['image'])
    return hf_dataset

train_dataset = to_hf_dataset(raw_train, preprocess_train)
val_dataset = to_hf_dataset(raw_val, preprocess_val_test)
test_dataset = to_hf_dataset(raw_test, preprocess_val_test)


  collections.Counter([int(x[1]) for x in raw_train])[0],  # normal
  collections.Counter([int(x[1]) for x in raw_train])[1]   # pneumonia


Map:   0%|          | 0/4708 [00:00<?, ? examples/s]

Map:   0%|          | 0/524 [00:00<?, ? examples/s]

Map:   0%|          | 0/624 [00:00<?, ? examples/s]

In [12]:
# 2. Load model
model = ResNetForImageClassification.from_pretrained(
    "microsoft/resnet-50",
    num_labels=2,
    ignore_mismatched_sizes=True
)

config.json:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([2, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# 3. Metrics: Accuracy, F1-score, ROC-AUC
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    try:
        probs = torch.softmax(torch.tensor(logits), dim=1)[:,1].numpy()
        auc = roc_auc_score(labels, probs)
    except:
        auc = 0.0
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': auc
    }


In [22]:
# 4. Custom Trainer with class weighting
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):  # <-- Add **kwargs
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights.to(model.device))
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss



In [36]:
# 5. Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    logging_dir='./logs',
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    learning_rate=2e-5,
    weight_decay=0.05,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    do_eval=True,
    load_best_model_at_end=True,
    metric_for_best_model='eval_f1',
    greater_is_better=True,
    report_to="none",
    seed=42
)


In [37]:
# 6. Trainer setup
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

  trainer = CustomTrainer(


In [38]:
# 7. Train and automatically save best model based on validation metric
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
1,0.2278,0.489276,0.862595,0.864368,0.966581,0.912621,0.911035
2,0.2204,0.269051,0.902672,0.961749,0.904884,0.93245,0.966219
3,0.1845,0.206292,0.923664,0.965333,0.930591,0.947644,0.975512
4,0.1482,0.186207,0.942748,0.973615,0.948586,0.960938,0.980558
5,0.1724,0.187059,0.944656,0.973684,0.951157,0.962289,0.979434


TrainOutput(global_step=740, training_loss=0.19614985182478623, metrics={'train_runtime': 2884.7144, 'train_samples_per_second': 8.16, 'train_steps_per_second': 0.257, 'total_flos': 4.998813966913536e+17, 'train_loss': 0.19614985182478623, 'epoch': 5.0})

In [44]:
val_results = trainer.evaluate(val_dataset)
print(val_results)

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Roc Auc
0,No log,0.188777,0.944656,0.971204,0.953728,0.962387,0.979644


{'eval_loss': 0.18877656757831573, 'eval_accuracy': 0.9446564885496184, 'eval_precision': 0.9712041884816754, 'eval_recall': 0.9537275064267352, 'eval_f1': 0.9623865110246433, 'eval_roc_auc': 0.9796439112634485}


In [39]:
# 8. Evaluate on test set
test_results = trainer.evaluate(test_dataset)
print(test_results)


{'eval_loss': 0.45805954933166504, 'eval_accuracy': 0.8541666666666666, 'eval_precision': 0.8359550561797753, 'eval_recall': 0.9538461538461539, 'eval_f1': 0.8910179640718563, 'eval_roc_auc': 0.942855577470962, 'eval_runtime': 62.4003, 'eval_samples_per_second': 10.0, 'eval_steps_per_second': 0.321, 'epoch': 5.0}


In [41]:
# After training is comple

# Save the model
trainer.save_model("./my_model_directory")


In [None]:
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained("./my_model_directory")


In [47]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# After training is complete
model.save_pretrained("results")



In [49]:
    from google.colab import drive
    drive.mount('/content/drive')

Mounted at /content/drive


In [55]:
feature_extractor.save_pretrained("./my_model_directory")


['./my_model_directory/preprocessor_config.json']

In [52]:
    model_save_name = '/content/my_model_directory'
    path = f'/content/drive/MyDrive{model_save_name}'
    torch.save(model.state_dict(), path)

RuntimeError: Parent directory /content/drive/MyDrive/content does not exist.

In [48]:
import torch
from transformers import AutoModelForImageClassification, AutoImageProcessor
from sklearn.metrics import f1_score, accuracy_score, matthews_corrcoef
import numpy as np

# 1. Load your saved model and processor
model_dir = "./my_model_directory"
model = AutoModelForImageClassification.from_pretrained("results")
processor = AutoImageProcessor.from_pretrained(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 2. Prepare test data (assuming test_dataset is already prepared as before)
# If not, you can use the same preprocessing as during training:
def preprocess_test(example):
    image = val_test_transform(example[0])  # PIL to tensor
    image = image.repeat(3, 1, 1)          # To 3 channels
    encoding = processor(images=[image], return_tensors="pt")
    return {'pixel_values': encoding['pixel_values'].squeeze(0), 'label': int(example[1])}

# If you have test_dataset (from before), use that:
# test_dataset = ... (from your previous code)
# If not, prepare test_dataset as in your training code

# 3. Run inference and collect predictions and labels
preds = []
labels = []
model.eval()
with torch.no_grad():
    for example in test_dataset:
        pixel_values = torch.tensor(example['pixel_values']).unsqueeze(0).to(device)
        output = model(pixel_values)
        pred = torch.argmax(output.logits, dim=1).item()
        preds.append(pred)
        labels.append(example['label'])

# 4. Calculate metrics
f1 = f1_score(labels, preds, average='binary')
accuracy = accuracy_score(labels, preds)
mcc = matthews_corrcoef(labels, preds)

print(f"F1-score: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Matthews Correlation Coefficient: {mcc:.4f}")


F1-score: 0.8892
Accuracy: 0.8510
Matthews Correlation Coefficient: 0.6807
