<a href="https://colab.research.google.com/github/ssudhanshu488/Alziehmer_Disease_Classification/blob/main/Alziehmer_Disease_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import os
import pandas as pd

# Path to your dataset directory
dataset_dir = '/content/All_img_diff_name'

# List all image files
image_files = [f for f in os.listdir(dataset_dir) if f.endswith('.jpg')]

# Extract class labels from filenames and create a DataFrame
data = []
for img_file in image_files:
    class_label = img_file.split('_')[0]  # Extract class from filename
    data.append({'image_path': os.path.join(dataset_dir, img_file), 'label': class_label})

# Create a DataFrame
df = pd.DataFrame(data)

# Map class labels to integers
class_to_id = {'AD': 0, 'CN': 1, 'MCI': 2}
df['label'] = df['label'].map(class_to_id)

# Save the DataFrame (optional)
df.to_csv('dataset.csv', index=False)

# Verify the DataFrame
print(df.head())
print(df.columns)

                               image_path  label
0  /content/All_img_diff_name/MCI_623.jpg      2
1  /content/All_img_diff_name/MCI_390.jpg      2
2   /content/All_img_diff_name/CN_731.jpg      1
3   /content/All_img_diff_name/AD_454.jpg      0
4    /content/All_img_diff_name/CN_75.jpg      1
Index(['image_path', 'label'], dtype='object')


In [11]:
from datasets import Dataset

# Load the dataset from the DataFrame
dataset = Dataset.from_pandas(df)

# Split the dataset into train and validation sets
dataset = dataset.train_test_split(test_size=0.2, seed=42)

In [12]:
from transformers import ViTFeatureExtractor
from PIL import Image

# Load the feature extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

# Preprocess the dataset
def preprocess_images(examples):
    images = [Image.open(img_path).convert('RGB') for img_path in examples['image_path']]
    examples['pixel_values'] = feature_extractor(images, return_tensors='pt')['pixel_values']
    return examples

dataset = dataset.map(preprocess_images, batched=True, batch_size=32)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



Map:   0%|          | 0/3209 [00:00<?, ? examples/s]

Map:   0%|          | 0/803 [00:00<?, ? examples/s]

In [13]:
from transformers import ViTForImageClassification

# Load the pretrained ViT model with ignore_mismatched_sizes=True
model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224',
    num_labels=3,  # Number of classes in your dataset
    id2label={0: 'AD', 1: 'CN', 2: 'MCI'},
    label2id={'AD': 0, 'CN': 1, 'MCI': 2},
    ignore_mismatched_sizes=True  # Ignore size mismatch in the final layer
)

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)



In [15]:
from sklearn.metrics import accuracy_score, f1_score, precision_score

def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids

    # Calculate metrics
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')  # Use 'weighted' for multi-class
    precision = precision_score(labels, preds, average='weighted')  # Use 'weighted' for multi-class

    return {
        'accuracy': accuracy,
        'f1_score': f1,
        'precision': precision
    }

In [16]:
from transformers import TrainerCallback
import matplotlib.pyplot as plt

# Custom callback to log training accuracy
class LogTrainingAccuracyCallback(TrainerCallback):
    def __init__(self):
        self.train_metrics = []
        self.eval_metrics = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            if 'loss' in logs and 'epoch' in logs:  # Logs during training
                self.train_metrics.append(logs)
            if 'eval_accuracy' in logs:  # Logs during evaluation
                self.eval_metrics.append(logs)

    def on_train_end(self, args, state, control, **kwargs):
        # Plot training and validation accuracy
        train_epochs = [m['epoch'] for m in self.train_metrics]
        train_acc = [m.get('accuracy', None) for m in self.train_metrics]
        eval_epochs = [m['epoch'] for m in self.eval_metrics]
        eval_acc = [m['eval_accuracy'] for m in self.eval_metrics]

        plt.figure(figsize=(10, 6))
        plt.plot(train_epochs, train_acc, label='Training Accuracy', marker='o')
        plt.plot(eval_epochs, eval_acc, label='Validation Accuracy', marker='o')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Training and Validation Accuracy')
        plt.legend()
        plt.grid()
        plt.show()

In [17]:
from transformers import Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    compute_metrics=compute_metrics,  # Use the updated function
    callbacks=[LogTrainingAccuracyCallback()]  # Add the custom callback
)

In [18]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mssudhanshu488[0m ([33mssudhanshu488-iiit-guwahati[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Score,Precision
1,0.539,0.610323,0.731009,0.722601,0.737967
2,0.2577,0.324946,0.865504,0.86599,0.86702
3,0.0587,0.272393,0.899128,0.899286,0.900113


KeyError: 'epoch'

In [19]:
trainer.evaluate()

Epoch,Training Loss,Validation Loss,Accuracy,F1 Score,Precision
1,0.539,0.610323,0.731009,0.722601,0.737967
2,0.2577,0.324946,0.865504,0.86599,0.86702
3,0.0587,0.272393,0.899128,0.899286,0.900113


{'eval_loss': 0.27239346504211426,
 'eval_accuracy': 0.8991282689912827,
 'eval_f1_score': 0.8992855729137234,
 'eval_precision': 0.9001132833193014}

In [20]:
model.save_pretrained('./alzheimer_vit_model')
feature_extractor.save_pretrained('./alzheimer_vit_model')

['./alzheimer_vit_model/preprocessor_config.json']