In [1]:
pip install wandb



In [2]:
import os
import requests
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoImageProcessor, AutoModel, AutoModelForImageClassification, Trainer, TrainingArguments
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score
from io import BytesIO

In [3]:
urltest = "https://huggingface.co/datasets/Falah/Alzheimer_MRI/resolve/main/data/test-00000-of-00001-44110b9df98c5585.parquet"
urltrain = "https://huggingface.co/datasets/Falah/Alzheimer_MRI/resolve/main/data/train-00000-of-00001-c08a401c53fe5312.parquet"
output_file_test = "test_data.parquet"
output_file_train = "train_data.parquet"

if not os.path.exists(output_file_train):
    print("Downloading the dataset")
    response = requests.get(urltrain)
    with open(output_file_train, 'wb') as f:
        f.write(response.content)
    print("Download complete")
else:
    print("File already exists")

if not os.path.exists(output_file_test):
    print("Downloading the dataset")
    response = requests.get(urltest)
    with open(output_file_test, 'wb') as f:
        f.write(response.content)
    print("Download complete")
else:
    print("File already exists")

data_train = pd.read_parquet(output_file_train)
data_test = pd.read_parquet(output_file_test)

print(data_train.head())

Downloading the dataset
Download complete
Downloading the dataset
Download complete
                                               image  label
0  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...      2
1  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...      0
2  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...      3
3  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...      3
4  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...      2


In [6]:
pip install wandb



In [7]:
class AlzheimerDataset(Dataset):
    def __init__(self, dataframe, processor):
        self.data = dataframe
        self.processor = processor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_data = self.data.iloc[idx]['image']['bytes']
        label = self.data.iloc[idx]['label']
        image = Image.open(BytesIO(image_data)).convert("RGB")
        encoding = self.processor(images=image, return_tensors="pt")
        return {
            "pixel_values": encoding["pixel_values"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

processor = AutoImageProcessor.from_pretrained("Falah/Alzheimer_classification_model")

train_dataset = AlzheimerDataset(data_train, processor)
test_dataset = AlzheimerDataset(data_test, processor)

model = AutoModelForImageClassification.from_pretrained(
    "Falah/Alzheimer_classification_model", num_labels=4, ignore_mismatched_sizes=True
)

for param in model.base_model.parameters():
    param.requires_grad = False

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=5,
    weight_decay=0.005,
    logging_dir="./logs",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    gradient_accumulation_steps=1,
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

os.environ["WANDB_DISABLED"] = "true"

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=processor,
    compute_metrics=compute_metrics
)

trainer.train()

print("Evaluating the model on test data")
metrics = trainer.evaluate(test_dataset)
print("Test Metrics:", metrics)

trainer.save_model("./fine_tuned_model")
processor.save_pretrained("./fine_tuned_processor")


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1377,0.280242,0.903125,0.902699


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1377,0.280242,0.903125,0.902699
2,0.0927,0.282033,0.905469,0.905381
3,0.0886,0.284234,0.90625,0.906182
4,0.1192,0.284501,0.907813,0.907808
5,0.0793,0.28506,0.907813,0.907808


Evaluating the model on test data


Test Metrics: {'eval_loss': 0.2845005393028259, 'eval_accuracy': 0.9078125, 'eval_f1': 0.9078079269375354, 'eval_runtime': 770.8801, 'eval_samples_per_second': 1.66, 'eval_steps_per_second': 0.415, 'epoch': 5.0}


['./fine_tuned_processor/preprocessor_config.json']