In [None]:
!git clone https://github.com/facebookresearch/dino.git
%cd dino

!pip install -r requirements.txt
!pip install torchvision torch

Cloning into 'dino'...
remote: Enumerating objects: 175, done.[K
remote: Total 175 (delta 0), reused 0 (delta 0), pack-reused 175 (from 1)[K
Receiving objects: 100% (175/175), 24.47 MiB | 6.22 MiB/s, done.
Resolving deltas: 100% (100/100), done.
/content/dino
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nv

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                              download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                             download=True, transform=transform)

train_set, val_set = torch.utils.data.random_split(train_dataset, [40000, 10000])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

100%|██████████| 169M/169M [00:03<00:00, 48.8MB/s]


In [None]:
import vision_transformer as vits

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = vits.__dict__['vit_small'](patch_size=16)
state_dict = torch.hub.load_state_dict_from_url(
    "https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth",
    map_location=device
)
model.load_state_dict(state_dict, strict=True)

model.to(device)
model.eval()

Downloading: "https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dino_deitsmall16_pretrain.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 235MB/s]


VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0-11): 12 x Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
  )
  (norm): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
  (head): Identity()
)

In [None]:
def evaluate(model, classifier, dataloader):
    model.eval()
    classifier.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            features = model(images)
            outputs = classifier(features)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    acc = 100 * correct / total
    return acc

def generate_random_mask(model, classifier, sparsity=0.7):
    mask = {}
    for name, param in model.named_parameters():
        if param.requires_grad:
            mask[name] = torch.bernoulli(torch.full_like(param, 1 - sparsity)).to(param.device)
    for name, param in classifier.named_parameters():
        if param.requires_grad:
            full_name = f"classifier.{name}"
            mask[full_name] = torch.bernoulli(torch.full_like(param, 1 - sparsity)).to(param.device)
    return mask

In [None]:
import torch.nn as nn

classifier = nn.Sequential(
    nn.Linear(model.embed_dim, 100)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    list(model.parameters()) + list(classifier.parameters()),
    lr=0.001,
    momentum=0.9
)

In [None]:
import time

num_epochs = 100
val_accuracies = []
val_losses = []

print("🚀 Starting centralized training with RANDOM MASK...")
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    classifier.train()
    running_loss = 0.0

    grad_mask = generate_random_mask(model, classifier, sparsity=0.7)

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        features = model(images)
        outputs = classifier(features)
        loss = criterion(outputs, labels)
        loss.backward()

        if grad_mask:
            with torch.no_grad():
                for name, param in model.named_parameters():
                    if name in grad_mask and param.grad is not None:
                        param.grad *= grad_mask[name]
                for name, param in classifier.named_parameters():
                    full_name = f"classifier.{name}"
                    if full_name in grad_mask and param.grad is not None:
                        param.grad *= grad_mask[full_name]

        optimizer.step()
        running_loss += loss.item() * labels.size(0)

    model.eval()
    classifier.eval()
    total = 0
    correct = 0
    val_loss_total = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            features = model(images)
            outputs = classifier(features)
            loss = criterion(outputs, labels)
            val_loss_total += loss.item() * labels.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    acc = 100 * correct / total
    val_loss = val_loss_total / total
    val_accuracies.append(acc)
    val_losses.append(val_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}] - Val Accuracy: {acc:.2f}% | Val Loss: {val_loss:.4f}")

print("✅ Centralized training with mask complete!")
print(f"⏱️ Total time: {(time.time() - start_time)/60:.2f} minutes")

🚀 Starting centralized training with RANDOM MASK...
Epoch [1/100] - Val Accuracy: 27.92% | Val Loss: 2.8469
Epoch [2/100] - Val Accuracy: 61.80% | Val Loss: 1.3242
Epoch [3/100] - Val Accuracy: 68.95% | Val Loss: 1.0629
Epoch [4/100] - Val Accuracy: 73.68% | Val Loss: 0.9154
Epoch [5/100] - Val Accuracy: 75.89% | Val Loss: 0.8489
Epoch [6/100] - Val Accuracy: 75.89% | Val Loss: 0.9315
Epoch [7/100] - Val Accuracy: 77.38% | Val Loss: 0.8894
Epoch [8/100] - Val Accuracy: 78.22% | Val Loss: 0.8966
Epoch [9/100] - Val Accuracy: 80.66% | Val Loss: 0.8262
Epoch [10/100] - Val Accuracy: 81.16% | Val Loss: 0.8067
Epoch [11/100] - Val Accuracy: 82.08% | Val Loss: 0.7716
Epoch [12/100] - Val Accuracy: 82.40% | Val Loss: 0.7624
Epoch [13/100] - Val Accuracy: 82.44% | Val Loss: 0.7780
Epoch [14/100] - Val Accuracy: 82.29% | Val Loss: 0.7767
Epoch [15/100] - Val Accuracy: 82.60% | Val Loss: 0.7688
Epoch [16/100] - Val Accuracy: 82.67% | Val Loss: 0.7794
Epoch [17/100] - Val Accuracy: 82.45% | Val L

In [None]:
import json
import os

EXPERIMENT_NAME = "centr_with__mask"
SAVE_DIR = "results_centr_with__mask"
os.makedirs(SAVE_DIR, exist_ok=True)


with open(f"{SAVE_DIR}/val_accuracy_{EXPERIMENT_NAME}.json", "w") as f:
    json.dump(val_accuracies, f)

with open(f"{SAVE_DIR}/val_loss_{EXPERIMENT_NAME}.json", "w") as f:
    json.dump(val_losses, f)

with open(f"{SAVE_DIR}/final_test_accuracy_{EXPERIMENT_NAME}.txt", "w") as f:
    f.write(str(test_acc))

with open(f"{SAVE_DIR}/final_test_loss_{EXPERIMENT_NAME}.txt", "w") as f:
    f.write(str(test_loss))

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(val_accuracies, label="Val Accuracy")
plt.xlabel("Round")
plt.ylabel("Accuracy (%)")
plt.title("Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(val_losses, label="Val Loss", color="orange")
plt.xlabel("Round")
plt.ylabel("Loss")
plt.title("Validation Loss")

plt.tight_layout()
plt.savefig(f"{SAVE_DIR}/val_accuracy_loss_plot_{EXPERIMENT_NAME}.png")
plt.show()

In [None]:
from google.colab import files

!zip -r results_centr_with__mask.zip results_centr_with__mask/

files.download('results_centr_with__mask.zip')

In [None]:
model.eval()
classifier.eval()

correct, total = 0, 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        features = model(images)
        outputs = classifier(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

In [None]:
import matplotlib.pyplot as plt

plt.plot(val_accuracies, marker='o')
plt.title("Centralized Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Validation Accuracy (%)")
plt.grid(True)
plt.savefig("centralized_accuracy_curve.png")
plt.show()

In [None]:
import csv
with open("centralized_results.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["Epoch", "Accuracy"])
    for i, acc in enumerate(val_accuracies):
        writer.writerow([i+1, acc])

In [None]:
import matplotlib.pyplot as plt
import numpy as np

classes = train_dataset.classes

model.eval()
classifier.eval()

dataiter = iter(test_loader)
images, labels = next(dataiter)

images, labels = images.to(device), labels.to(device)
features = model(images)
outputs = classifier(features)
_, predicted = torch.max(outputs, 1)

fig = plt.figure(figsize=(10, 5))
for idx in np.arange(10):
    ax = fig.add_subplot(2, 5, idx+1, xticks=[], yticks=[])
    plt.imshow(np.transpose(images[idx].cpu().numpy(), (1, 2, 0)))
    ax.set_title(f"{classes[predicted[idx]]}\n(True: {classes[labels[idx]]})",
                 color=("green" if predicted[idx]==labels[idx] else "red"))
plt.show()