# The Pursuit of Human Labeling: A New Perspective on Unsupervised Learning

A new approach to unsupervised classification based on existing vision models

Implementation of the research paper published by Artyom Gadetsky and Maria Brbic from the EPFL AI Reasearch Center. The abstract can be found [here](https://openreview.net/pdf?id=3GpIeVYw8X).

**Approach** : Human labeled points are linearly separable in a sufficiently strong
representation space, and are invariant to the underlying model and resulting representation space.

In [None]:
!wget https://brbiclab.epfl.ch/wp-content/uploads/2023/11/data.zip
!unzip data.zip

In [47]:
import torch
import torchvision.models as models
import numpy as np

phi2model=torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

phi2model.eval()
statedict=phi2model.state_dict()


Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 228MB/s]


In [48]:
# convert to numpy
numpy_params = {key: value.numpy() for key, value in statedict.items()}
#save npy file
np.save('dinov2basic.npy', numpy_params)

## Getting the First representation

Finetuned on CIFAR10

In [None]:
!pip3 install timm

In [5]:
import torch

In [1]:
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision import models


# create a transform class for applying the normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # normalize the representations to have unit norm
])

In [2]:
# load the data
train_dataset = CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = CIFAR10(root='./data', train=False, transform=transform, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 13064179.36it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
# Define DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

In [6]:
device=torch.device('cuda')

In [7]:
print(device)

cuda


We need to check the architecture of the chosen model and modify the last layer according to the original layer in the head

In [27]:
from torchvision import models

resnet = models.resnet50(True)
vgg16=models.vgg16(True)
num_classes = 10

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:07<00:00, 75.5MB/s]


In [28]:
for name, param in resnet.named_parameters():
  param.requires_grad = False

In [29]:
for name, param in vgg16.named_parameters():
  param.requires_grad = False

In [31]:
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)

In [32]:
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features,10)

In [None]:
for name, param in vgg16.named_parameters():
  print(name, param.requires_grad)

In [23]:
resnet=resnet.to(device)

In [35]:
vgg16=vgg16.to(device)

In [37]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr = 0.001, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

num_epochs = 10

In [None]:
for epoch in range(num_epochs):

  vgg16.train()
  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)

    outputs = vgg16(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()

    loss.backward()
    optimizer.step()
  print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item()}')

  scheduler.step()

  vgg16.eval()
  total_loss = 0
  correct = 0
  total = 0

  with torch.no_grad():
    for images, labels in test_loader:
      images, labels = images.to(device), labels.to(device)
      outputs = vgg16(images)
      loss = criterion(outputs, labels)
      total_loss += loss.item() * labels.size(0)
      total += labels.size(0)

      _, predicted = torch.max(outputs.data, 1)
      correct += (predicted == labels).sum().item()


  average = total_loss / total
  print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {average:.4f}')
  accuracy = correct / total
  print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {100 * accuracy:.2f}%')

In [50]:
vgg16.to("cpu")
vgg16.eval()
# sae the model state
state_dictphi1 = vgg16.state_dict()

tonumpystate= {key: value.numpy() for key, value in state_dictphi1.items()}

# Save the NumPy parameters to a .npy file
np.save('vgg16cifar10.npy', tonumpystate)