<a href="https://colab.research.google.com/github/mohammad-rahbari/federated-learning_visual-classification/blob/main/notebooks/Centralized_model_visual_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing DINO and installing its dependencies

In [1]:
# @title Clon the DINO ripo
!git clone https://github.com/facebookresearch/dino.git

Cloning into 'dino'...
remote: Enumerating objects: 175, done.[K
remote: Total 175 (delta 0), reused 0 (delta 0), pack-reused 175 (from 1)[K
Receiving objects: 100% (175/175), 24.47 MiB | 9.58 MiB/s, done.
Resolving deltas: 100% (100/100), done.


In [2]:
# @title Installing required dependencies regarding DINO
%cd dino
!pip install -r requirements.txt
!pip install timm

/content/dino
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->timm)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->timm)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->timm)
  Downloading nvidia_cufft


# preprocessing the CIFAR-100 dataset

feature size in CIFAR is 32x32 but DINO requires 224x224 in the input layer.

In first step we upscale the dataset and then we add randomization to it

In last step of transformation we normalize data usind mean value and standard division of ImageNet



In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [4]:
transform_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225))
])

In [5]:
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                        download=True, transform=transform_train)
test_daataset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                       download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_daataset, batch_size=64, shuffle=False, num_workers=2)

100%|██████████| 169M/169M [00:14<00:00, 12.0MB/s]


# Loading and preparing the pretrained DINO model *(DINO-DeiT_Small)*

In [6]:
# @title loadig the model
import torch.hub

dino_model = torch.hub.load('facebookresearch/dino:main', 'dino_vits16')
# dino_model = torch.hub.load('facebookresearch/dino:main', 'dino_xcit_small_12_p16')

Downloading: "https://github.com/facebookresearch/dino/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dino_deitsmall16_pretrain.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 317MB/s]


In [7]:
# @title Model Configuration

import torch
import torch.nn as nn

class DinoClassifire(nn.Module):
  def __init__(self, dino_model, num_classes:int=100, device=None):
    super(DinoClassifire, self).__init__()
    self.backbone = dino_model

    #We need to freaze thhe parameters of bakbone first so we can train only on the head layer(output layer)
    for param in self.backbone.parameters():
      param.requiers_grad = False

    #determine the Device
    if device is None:
      device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    self.backbone.to(device)

    #To detect the output feature dimontion of backbone we run  Dummy forward pass
    with torch.no_grad():
      dummy_input = torch.randn(1,3,224,224).to(device)
      dummy_out = self.backbone(dummy_input)


      #If the output is 3D (B, T, D), we assume first token is the [CLS] token.
      if dummy_out.dim() == 3:
        dummy_feature = dummy_out[:,0]
      else:
        dummy_feature = dummy_out
      feature_dim = dummy_feature.shape[1]
      print("Detected feature dimontion:", feature_dim)


      #Difineing the classification Head
      self.head = nn.Linear(feature_dim, num_classes)

      #Ensure the head is trainable.
      for param in self.head.parameters():
        param.requires_grad = True

  def forward(self,x):

    #pass the input through the backbone
    features = self.backbone(x)

    # If featers are retuened as (B, T, D), use the first token
    if features.dim() == 3:
      cls_token = features[:,0]
    else:
      cls_token = features
    logits = self.head(cls_token)

    return logits

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DinoClassifire(dino_model=dino_model, num_classes=100, device=device)
model.to(device)

Detected feature dimontion: 384


DinoClassifire(
  (backbone): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0-11): 12 x Block(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (norm): L

# Config the loss, optimizer and training loop

In [None]:
import torch.optim as optim
from IPython.display import clear_output

criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.head.parameters(),lr=1e-3)
optimizer = optim.SGD(model.head.parameters(),lr=1e-3, momentum=0.9 )

num_epochs = 10

for epoch in range(num_epochs):
  model.train()
  running_loss = 0.0
  message = ""
  for index, (images, lables) in enumerate(train_loader):
    images = images.to(device).requires_grad_(True)
    lables =  lables.to(device)

    if index % 10 == 0:
      clear_output(wait=True)
      print(f"Epoch:{epoch+1}/{num_epochs}, Batch:{index+1}/{len(train_loader)}, {message}")

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs,lables)


    loss.backward()
    optimizer.step()
    running_loss += loss.item() +images.size(0)

  epoch_loss = running_loss / len(train_loader.dataset)
  message = f"Epoch:{epoch+1},Loss {epoch_loss:.4f}"
  print(message)

Epoch:1/10, Batch:51/782, 


# Evaluation

In [9]:
def eveluation(model, data_loader):
  model.eval()
  correct = 0
  total = 0

  with torch.no_grad():
    for images, lables in  data_loader:
      images, lables = images.to(device), lables.to(device)
      outputs = model(images)

      _, prediction = torch.max(outputs.data,1)

      total += lables.size(0)
      correct += (prediction == lables).sum().item()
    for i in range(len(prediction)):
      print(prediction[i], lables[i])
    return correct/ total

test_accracy = eveluation(model, test_loader)
print(f"evaluation accurace:{test_accracy:.2f}")


tensor(59, device='cuda:0') tensor(96, device='cuda:0')
tensor(90, device='cuda:0') tensor(90, device='cuda:0')
tensor(77, device='cuda:0') tensor(14, device='cuda:0')
tensor(77, device='cuda:0') tensor(77, device='cuda:0')
tensor(4, device='cuda:0') tensor(65, device='cuda:0')
tensor(7, device='cuda:0') tensor(7, device='cuda:0')
tensor(75, device='cuda:0') tensor(75, device='cuda:0')
tensor(27, device='cuda:0') tensor(27, device='cuda:0')
tensor(16, device='cuda:0') tensor(16, device='cuda:0')
tensor(30, device='cuda:0') tensor(30, device='cuda:0')
tensor(74, device='cuda:0') tensor(50, device='cuda:0')
tensor(83, device='cuda:0') tensor(83, device='cuda:0')
tensor(63, device='cuda:0') tensor(14, device='cuda:0')
tensor(51, device='cuda:0') tensor(51, device='cuda:0')
tensor(42, device='cuda:0') tensor(42, device='cuda:0')
tensor(70, device='cuda:0') tensor(70, device='cuda:0')
evaluation accurace:0.74


In [10]:
torch.save(model.state_dict(), 'model.pth')