In [1]:
import torch
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

Using cache found in /home/krishna/.cache/torch/hub/facebookresearch_dinov2_main


In [2]:
model  = dinov2_vits14

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import random

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
       
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = '/media/krishna/New Volume/4-2_project/Covid19-dataset'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=6,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
def check_one_dim(arr):
    non_singleton_dims = [dim for dim in arr.shape if dim != 1]
    return len(non_singleton_dims) == 1


print(check_one_dim(np.array([[[1,2,3,4,5]]])))
check_one_dim(np.array([[1, 2, 3, 4, 5],[2,3,4,5,6]]))

True


False

In [6]:
model.named_modules

<bound method Module.named_modules of DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (

In [7]:
captured_output = None


def hook_fn(module, input, output):
    global captured_output
    captured_output = output

In [8]:

hook = model.head.register_forward_hook(hook_fn)

dummy_input = torch.randn(1, 3, 224, 224) 

with torch.no_grad():   
    model(dummy_input)
captured_output

tensor([[ 2.0760,  0.5094,  2.6018, -0.6900, -0.6815,  2.7839,  3.4812, -0.3391,
         -1.7705, -1.4766,  0.1756, -1.0031,  0.0779, -0.4190, -0.7863,  0.9571,
         -1.2700, -1.2374, -1.7150, -0.1645, -1.5155,  5.8442, -2.0942, -0.5449,
         -0.0348, -0.1112,  2.9165, -1.8092,  2.1167, -2.2907, -1.3612,  1.5094,
         -2.3725,  0.6060, -3.3090, -2.5324, -0.7915, -3.6165, -1.3682,  2.0614,
          3.5618, -1.8876, -1.3837,  4.9894, -1.2586,  2.8061, -0.3885,  0.5692,
         -0.5395,  2.8630,  2.2034, -0.0611, -0.8806,  0.8421,  2.7172, -0.2125,
         -3.6988, -2.2154, -3.4312, -2.0203,  1.6934, -3.2019, -2.9934,  0.4525,
         -1.9814,  0.9606,  2.3718,  0.8299,  5.4598,  0.9661, -1.1369,  3.3753,
         -0.9526, -2.5842, -5.5036,  1.1879,  2.2826, -0.2027, -0.7028, -2.7589,
          0.5832, -0.9343, -4.0178, -0.6276,  5.5068,  0.1052,  1.2478, -0.2787,
          2.7802, -3.0194, -0.6918,  0.8641,  4.2817, -1.9419, -2.0761,  3.5279,
          3.1512,  4.8162, -

In [9]:
captured_output.shape

torch.Size([1, 384])

In [43]:
hook.remove()

In [11]:
transform1 = transforms.Compose([           
                                transforms.Resize(520),
                                transforms.CenterCrop(518), #should be multiple of model patch_size                 
                                transforms.ToTensor(),                    
                                transforms.Normalize(mean=0.5, std=0.2)
                                ])

In [44]:

def get_features(name):
    def hook(model, input, output):
        features[name] = output.detach()
    return hook
features = {}
handle=model.head.register_forward_hook(get_features('feats'))

In [45]:
from tqdm import tqdm
folder_path = "/media/krishna/New Volume/4-2_project/Covid19-dataset/train"

data_dict = []

with torch.no_grad():
    for class_folder in (os.listdir(folder_path)):
        class_path = os.path.join(folder_path, class_folder)
        if os.path.isdir(class_path):
            for img_name in tqdm(os.listdir(class_path)):
                captured_output = None
                img_path = os.path.join(class_path, img_name)
                img = Image.open(img_path).convert('RGB')
                img_t = transform1(img)

             
                with torch.no_grad():
                    model(img_t.unsqueeze(0))

              
                data_dict.append({
                    'file_path': img_path,
                    'class_label': class_folder,
                    'embeddings': features['feats']
                })


  0%|          | 0/111 [00:00<?, ?it/s]

100%|██████████| 111/111 [05:47<00:00,  3.13s/it]
100%|██████████| 70/70 [03:34<00:00,  3.06s/it]
100%|██████████| 70/70 [04:02<00:00,  3.46s/it]


In [57]:
for i in range(len(data_dict)):
    # data_dict[i]['embeddings'] = data_dict[i]['embeddings'].detach().numpy()
    data_dict[i]['embeddings'] = data_dict[i]['embeddings'].flatten()

In [64]:
type(data_dict)

list

In [66]:
import json
for item in data_dict:
    item["embeddings"] = item["embeddings"].tolist()


json_data = json.dumps(data_dict, indent=4)


print(json_data)

with open('data.json', 'w') as json_file:
    json_file.write(json_data)

[
    {
        "file_path": "/media/krishna/New Volume/4-2_project/Covid19-dataset/train/Covid/01.jpeg",
        "class_label": "Covid",
        "embeddings": [
            1.3095072507858276,
            2.8001673221588135,
            -1.0062352418899536,
            4.663592338562012,
            -2.955740213394165,
            -1.5341284275054932,
            -1.3919763565063477,
            1.56229829788208,
            2.477426528930664,
            1.0419279336929321,
            -0.4005652368068695,
            1.7372806072235107,
            0.3449461758136749,
            3.332782506942749,
            3.7832987308502197,
            1.1264911890029907,
            -3.5011558532714844,
            -3.6269233226776123,
            -0.069302499294281,
            -0.8945092558860779,
            1.3181043863296509,
            -2.925920009613037,
            4.151780605316162,
            -0.8602784872055054,
            1.578072190284729,
            2.4068448543548584,
     

In [68]:
with open('data.json', 'r') as json_file:
    data = json.load(json_file)


for item in data:
    item["embeddings"] = np.array(item["embeddings"])