In [None]:
from google.colab import drive
drive.mount("/content/drive/")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import random

In [None]:
def set_seed(no):
    torch.manual_seed(no)
    random.seed(no)
    np.random.seed(no)
    os.environ['PYTHONHASHSEED'] = str()
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(100)

In [None]:
print(torch.__version__)

2.0.0+cu118


In [None]:
cd "drive/MyDrive/DinoV2/"

/content/drive/MyDrive/DinoV2


In [None]:
ls

0.DinoV2_Demo.ipynb  [0m[01;34mcat_dog_dataset[0m/  DinoV2_Classification.ipynb


In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'cat_dog_dataset/'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=6,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



In [None]:
dataloaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7fb35f852410>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7fb35f852380>}

Model


In [None]:
# load dino model
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main


In [None]:
class DinoVisionTransformerClassifier(nn.Module):
    def __init__(self):
        super(DinoVisionTransformerClassifier, self).__init__()
        self.transformer = dinov2_vits14
        self.classifier = nn.Sequential(
            nn.Linear(384, 256),
            nn.ReLU(),
            nn.Linear(256, 2)
        )
    
    def forward(self, x):
        x = self.transformer(x)
        x = self.transformer.norm(x)
        x = self.classifier(x)
        return x


In [None]:
import torch.optim as optim

model = DinoVisionTransformerClassifier()


model1 = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = model1.fc.in_features
model1.fc = nn.Linear(num_ftrs, 2)
model1 = model1.to(device)


criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.000001)

In [None]:
len(dataloaders["train"])

100

In [None]:
model = model.to(device)

Train

In [None]:
for epoch in range(6):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dataloaders["train"], 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,    50] loss: 0.389
[1,   100] loss: 0.162
[2,    50] loss: 0.118
[2,   100] loss: 0.092
[3,    50] loss: 0.071
[3,   100] loss: 0.057
[4,    50] loss: 0.052
[4,   100] loss: 0.051
[5,    50] loss: 0.055
[5,   100] loss: 0.047
[6,    50] loss: 0.033
[6,   100] loss: 0.049
Finished Training


Testing

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in dataloaders["test"]:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images.to(device))
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.to("cpu") == labels).sum().item()

print(f'Accuracy of the network on the {len(dataloaders["test"])*6} test images: {100 * correct // total} %')

Accuracy of the network on the 42 test images: 100 %


Gradcam not possible as conv layer is not extrating much features (or) shall we try on the first conv layer in arch

Resnet classification

In [None]:
model1 = model1.to(device)

for epoch in range(6):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dataloaders["train"], 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model1(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,    50] loss: 0.771
[1,   100] loss: 0.783
[2,    50] loss: 0.775
[2,   100] loss: 0.787
[3,    50] loss: 0.781
[3,   100] loss: 0.773
[4,    50] loss: 0.790
[4,   100] loss: 0.764
[5,    50] loss: 0.767
[5,   100] loss: 0.783
[6,    50] loss: 0.752
[6,   100] loss: 0.785
Finished Training


In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in dataloaders["test"]:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model1(images.to(device))
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.to("cpu") == labels).sum().item()

print(f'Accuracy of the network on the {len(dataloaders["test"])*6} test images: {100 * correct // total} %')

Accuracy of the network on the 42 test images: 42 %


In [None]:
correct

17

In [None]:
total

40