<a href="https://colab.research.google.com/github/obete/ClassifierDINOv3/blob/main/ClassifierDINOv3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **DINOv3 as base with a classifier head**


In [None]:
#@title Dependencies and Modules
!pip install tensorflow pillow requests

import os
import pandas as pd
import requests
from PIL import Image
from io import BytesIO


**Dataset Used**

The data used in this project is Fitzpartick17k dataset available in kaggle. Its a csv comprising of approximately 17,000 record of image url, fitzpatick scale and labels of skin condition for each image.

In [None]:
#@title Setting Data Path
from google.colab import drive
drive.mount('/content/drive')
file_path = "/content/drive/My Drive/Colab Notebooks/AI_ML_EXAM/fitzpatrick17k.csv"

In [None]:
#@title Loading Dataset Google drive onto this notebook
skin_data = pd.read_csv(file_path)
skin_data.head()

**FIltering the Data**

This study is focusing on the Afican skin which falls under fitzpatrick scal 4, 5 and 6. There fore not all the 17,000 images in the data set will be useful for this project. We filtered out to remain with only desired scale.
This leave us with 4934 images corresponding to the desired skin color.

In [None]:
#@title Filtering Data for Fitzpatrick Scale 5 and 6 and dropping the empty urls
filtered_data = skin_data[skin_data['fitzpatrick_scale'].isin([5,6]) & skin_data['url'].notna()]
filtered_data[['url','label','fitzpatrick_scale']].describe()

In [None]:
# Target folder under My Drive
DRIVE_ROOT = '/content/drive/MyDrive/Colab Notebooks/AI_ML_EXAM/'
base_dir = f'{DRIVE_ROOT}/skin_images'
os.makedirs(base_dir, exist_ok=True)

**Accessing Images.**

The .csv fil provides url to the public location of the images. Therefore, to make use of it, in this section, we download, resize  and save the images in their correct labels in our google drive in a folder skin_images.                                                                   This section does that and takes a count of the total number of images that have successfully been downloaded.

In [None]:
#@title Downloading and Resizing Images
from pathlib import Path
# Target folder under My Drive
DRIVE_ROOT = '/content/drive/MyDrive/Colab Notebooks/AI_ML_EXAM/'
base_dir = f'{DRIVE_ROOT}/skin_images'
os.makedirs(base_dir, exist_ok=True)

# Remove duplicates from the dataset
filtered_data = filtered_data.drop_duplicates(subset=['url'])

# Headers to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"
}

# Initialize counters
total_images_downloaded = 0
failed_urls = []

# Download, resize, and save images
for idx, row in filtered_data.iterrows():
    lbl, url = row['label'], row['url']
    outdir = os.path.join(base_dir, lbl)
    os.makedirs(outdir, exist_ok=True)
    try:
        # Use headers in the request
        resp = requests.get(url, headers=headers, timeout=10)  # Add headers
        resp.raise_for_status()

        # Process and save the image
        img = Image.open(BytesIO(resp.content)).convert('RGB')
        img = img.resize((224, 224))

        # Ensure unique filenames
        unique_filename = f"{idx}_{hash(url)}.jpg"
        img.save(os.path.join(outdir, unique_filename))
        total_images_downloaded += 1
    except Exception as e:
        failed_urls.append((url, str(e)))  # Log failed URLs

# Verify and count total files
file_count = sum(1 for _ in Path(base_dir).rglob('*.jpg'))

# Display results
print(f"Total images downloaded and stored: {total_images_downloaded}")
print(f"Total image files found in folder and subfolders: {file_count}")
print(f"Failed URLs: {len(failed_urls)}")

# Optional: Save failed URLs for review
failed_urls_path = f'{DRIVE_ROOT}/failed_urls.txt'
with open(failed_urls_path, 'w') as f:
    for url, error in failed_urls:
        f.write(f"{url}\t{error}\n")
print(f"Failed URLs logged to: {failed_urls_path}")



In [None]:
import random
import shutil

for class_name in os.listdir(f'{DRIVE_ROOT}/skin_images'):
    class_path = os.path.join(f'{DRIVE_ROOT}/skin_images', class_name)
    if os.path.isdir(class_path):
        train_class_path = f"{DRIVE_ROOT}/train_set/{class_name}"
        val_class_path = f"{DRIVE_ROOT}/val_set/{class_name}"
        os.makedirs(train_class_path, exist_ok=True)
        os.makedirs(val_class_path, exist_ok=True)

        # List all images
        images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        random.shuffle(images)

        # Split 80/20
        split_idx = int(0.8 * len(images))
        train_files = images[:split_idx]
        val_files = images[split_idx:]

        # Copy into train/
        for f in train_files:
            shutil.copy(os.path.join(class_path, f), os.path.join(train_class_path, f))

        # Copy into val/
        for f in val_files:
            shutil.copy(os.path.join(class_path, f), os.path.join(val_class_path, f))

In [None]:
from torchvision import transforms

# For ViT-Small/Base: 224x224, for ViT-Large: 384x384
img_size = 224

transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])


In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

train_dir = f'{DRIVE_ROOT}/train_set'
val_dir = f'{DRIVE_ROOT}/val_set'

train_dataset = ImageFolder(root=train_dir, transform=transform)
val_dataset = ImageFolder(root=val_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Class names
print(train_dataset.classes)


In [None]:
#@title Using DINOv3

!pip install -U transformers

Local Inference on GPU
Model page: https://huggingface.co/facebook/dinov3-vith16plus-pretrain-lvd1689m

⚠️ If the generated code snippets do not work, please open an issue on either the model repo and/or on huggingface.js 🙏

The model you are trying to use is gated. Please make sure you have access to it by visiting the model page.To run inference, either set HF_TOKEN in your environment variables/ Secrets or run the following cell to login. 🤗

In [None]:
from huggingface_hub import login
login(new_session=False)

In [None]:
# @title Load model directly
from transformers import AutoImageProcessor, AutoModel

processor = AutoImageProcessor.from_pretrained("facebook/dinov3-vith16plus-pretrain-lvd1689m")
backbone = AutoModel.from_pretrained("facebook/dinov3-vith16plus-pretrain-lvd1689m")

In [None]:
#@title Freeze backbone → use DINOv3 as a fixed feature extractor.
for param in backbone.parameters():
    param.requires_grad = False  # freeze backbone


In [None]:
#@title Custom Classification head
import torch.nn as nn

class DinoClassifier(nn.Module):
    def __init__(self, model, num_classes):
        super().__init__()
        self.model = model
        self.head = nn.Linear(model.config.hidden_size, num_classes)  # simple linear head

    def forward(self, x):
        features = self.model(x).last_hidden_state[:,0]  # CLS token
        return self.head(features)


In [None]:
!pip install torchmetrics

In [None]:
import torch
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"

model = DinoClassifier(backbone, num_classes=len(train_dataset.classes)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(5):  # Example: 5 epochs
    model.train()
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        outputs = model(imgs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


In [None]:
import torchmetrics

device = "cuda" if torch.cuda.is_available() else "cpu"

# Define metric objects
train_acc_metric = torchmetrics.Accuracy(task="multiclass", num_classes=len(train_dataset.classes)).to(device)
val_acc_metric   = torchmetrics.Accuracy(task="multiclass", num_classes=len(train_dataset.classes)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10
for epoch in range(num_epochs):
    # ---- Training ----
    model.train()
    train_loss = 0
    train_acc_metric.reset()
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        outputs = model(imgs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)
        train_acc_metric.update(outputs, labels)

    train_loss /= len(train_dataset)
    train_acc = train_acc_metric.compute()

    # ---- Validation ----
    model.eval()
    val_loss = 0
    val_acc_metric.reset()
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * imgs.size(0)
            val_acc_metric.update(outputs, labels)

    val_loss /= len(val_dataset)
    val_acc = val_acc_metric.compute()

    print(f"Epoch {epoch+1}/{num_epochs} "
          f"| Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} "
          f"| Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
