In [3]:
# imports
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display, HTML

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torchvision.models import MobileNet_V3_Small_Weights

from torchinfo import summary

from introdl.utils import get_device, load_results, load_model
from introdl.idlmam import train_network
from introdl.visul import plot_training_metrics

sns.set_theme(style='whitegrid')
plt.rcParams['figure.figsize'] = [8, 6]  # Set the default figure size (width, height) in inches

In [51]:
# data is in Rock-Paper-Scissors folders test, train, validation with subfolders for each class set up as ImageFolder
data_dir = 'Rock-Paper-Scissors'

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
image_size = 224
batch_size = 32

# set up the transforms resize to 224x224, convert to tensor, normalize for ImageNet.  separate for training and validation
train_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])  
val_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# create the ImageFolder datasets for training and validation and testing
train_all_dataset = ImageFolder(root=f'{data_dir}/train', transform=train_transform)

train_size = int(0.8 * len(train_all_dataset))
val_size = len(train_all_dataset) - train_size
torch.manual_seed(42)
train_dataset, val_dataset = random_split(train_all_dataset, [train_size, val_size])

test_dataset = ImageFolder(root=f'{data_dir}/test', transform=val_transform)

# create the DataLoader for training and validation and testing
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# display the class names
class_names = train_all_dataset.classes
print(class_names)


['paper', 'rock', 'scissors']


In [47]:

# Load the pre-trained model with new API
model = models.mobilenet_v3_small(weights=MobileNet_V3_Small_Weights.DEFAULT)

# Change the number of output classes to 3
model.classifier[-1] = torch.nn.Linear(model.classifier[-1].in_features, 3)

# Freeze all layers except the classifier
for param in model.features.parameters():
    param.requires_grad = False

In [48]:
summary(model, input_size=(batch_size, 3, image_size, image_size))

Layer (type:depth-idx)                             Output Shape              Param #
MobileNetV3                                        [32, 3]                   --
├─Sequential: 1-1                                  [32, 576, 7, 7]           --
│    └─Conv2dNormActivation: 2-1                   [32, 16, 112, 112]        --
│    │    └─Conv2d: 3-1                            [32, 16, 112, 112]        (432)
│    │    └─BatchNorm2d: 3-2                       [32, 16, 112, 112]        (32)
│    │    └─Hardswish: 3-3                         [32, 16, 112, 112]        --
│    └─InvertedResidual: 2-2                       [32, 16, 56, 56]          --
│    │    └─Sequential: 3-4                        [32, 16, 56, 56]          (744)
│    └─InvertedResidual: 2-3                       [32, 24, 28, 28]          --
│    │    └─Sequential: 3-5                        [32, 24, 28, 28]          (3,864)
│    └─InvertedResidual: 2-4                       [32, 24, 28, 28]          --
│    │    └─Sequential

In [49]:
loss_func = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.AdamW(model.parameters())  # Adam optimizer

device = get_device()
print(device)

ckpt_file = 'models/model_mnv3.pt'
epochs = 5

score_funcs = {'ACC':accuracy_score}

mps


In [50]:
'''
resume_from_checkpoint = False

if not resume_from_checkpoint:
    # Load the pre-trained model with new API
    model = models.mobilenet_v3_small(weights=MobileNet_V3_Small_Weights.DEFAULT)

    # Change the number of output classes to 3
    model.classifier[-1] = torch.nn.Linear(model.classifier[-1].in_features, 3)

    # Freeze all layers except the classifier
    for param in model.features.parameters():
        param.requires_grad = False
'''

results = train_network(model,
                        loss_func,
                        train_loader,
                        device=device,
                        val_loader=val_loader,
                        epochs = epochs,
                        optimizer = optimizer,
                        score_funcs = score_funcs,
                        checkpoint_file=ckpt_file,
                        resume_checkpoint=False)

Epoch: 100%|██████████| 5/5 [00:48<00:00,  9.73s/it]


In [52]:
results

Unnamed: 0,epoch,total time,train loss,val loss,train ACC,val ACC
0,0,7.833302,0.164662,0.615922,0.951389,0.777778
1,1,15.619367,0.0214,0.602077,0.994048,0.775794
2,2,23.246278,0.011062,0.274159,0.99752,0.878968
3,3,30.803239,0.007612,0.280689,0.998016,0.89881
4,4,38.46575,0.004637,0.014747,0.999504,0.998016


# Use DINOv2

In [6]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = AutoImageProcessor.from_pretrained('facebook/dinov2-small')
model = AutoModel.from_pretrained('facebook/dinov2-small')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state


In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoImageProcessor
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

# Load DINOv2 Model and Processor
model_name = "facebook/dinov2-small"
dinov2 = AutoModel.from_pretrained(model_name)
processor = AutoImageProcessor.from_pretrained(model_name)

# Initialize the classifier model
num_classes = 10  # Replace with the number of classes in your dataset
model = DinoClassifier(dinov2, num_classes)

# Move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Example Dataset and Dataloader (replace with your own dataset)
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, processor):
        self.dataset = dataset
        self.processor = processor

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        # Preprocess image using DINOv2 processor
        inputs = self.processor(images=image, return_tensors="pt")
        pixel_values = inputs['pixel_values'].squeeze(0)  # Remove batch dimension
        return pixel_values, label

# Use CIFAR-10 as an example (replace with your custom dataset)
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_dataset = CustomImageDataset(train_dataset, processor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.fc.parameters(), lr=1e-3)  # Train only classification head initially

# Step 1: Freeze the backbone and train the classification head
model.freeze_backbone()

# Phase 1: Train only the classification head
num_epochs_phase_1 = 3  # Train for a few epochs with the backbone frozen
for epoch in range(num_epochs_phase_1):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for pixel_values, labels in tqdm(train_loader):
        pixel_values, labels = pixel_values.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        logits = model(pixel_values)
        loss = criterion(logits, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track statistics
        running_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_predictions
    print(f'Phase 1 Epoch {epoch+1}/{num_epochs_phase_1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

# Step 2: Unfreeze the backbone and fine-tune the entire model
model.unfreeze_backbone()

# Update optimizer to include all model parameters for fine-tuning
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)  # Lower learning rate for fine-tuning

# Phase 2: Fine-tune the entire model
num_epochs_phase_2 = 5  # Fine-tune for a few more epochs
for epoch in range(num_epochs_phase_2):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for pixel_values, labels in tqdm(train_loader):
        pixel_values, labels = pixel_values.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        logits = model(pixel_values)
        loss = criterion(logits, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track statistics
        running_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_predictions += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_predictions
    print(f'Phase 2 Epoch {epoch+1}/{num_epochs_phase_2}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')
