In [1]:
import json
import os
import sys

In [2]:
# os.chdir('../')

In [3]:
from icecream import ic
import numpy as np
import torch
import torch.multiprocessing as mp
import torch.nn as nn
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models import densenet
from tqdm import tqdm

In [6]:
sys.path.append('../dataset/')
import coco_data_prep

loading annotations into memory...
Done (t=12.45s)
creating index...
index created!
loading annotations into memory...
Done (t=6.76s)
creating index...
index created!
loading annotations into memory...
Done (t=0.12s)
creating index...
index created!


In [7]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Global Variables

In [8]:
train_np_data_dir = '../data/numpy_imgs/train_subset/'
train_jpg_data_dir = '../data/raw/train/train2014/'
train_annot_filepath = '../data/raw/train/annotations/instances_train2014.json'

with open('../dataset/imgs_by_supercategory.json', 'r') as f:
    desired_categories = json.load(f)

In [9]:
val_np_dir = '../data/numpy_imgs/valid_subset/'
val_jpg_data_dir = '../data/raw/validation/val2014/'
val_annot_filepath = '../data/raw/train/annotations/instances_val2014.json'

In [10]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device('cpu')

### Load Data

In [12]:
%autoreload

In [13]:
val_ds = coco_data_prep.COCODataset('val',
                                    val_np_dir,  
                                    val_annot_filepath,
                                    device=torch.device('cpu'))

loading annotations into memory...
Done (t=6.88s)
creating index...
index created!



  0%|                                                                      | 0/40504 [00:00<?, ?it/s][A
  0%|▏                                                         | 133/40504 [00:00<00:30, 1324.40it/s][A
  1%|▍                                                         | 303/40504 [00:00<00:26, 1543.48it/s][A
  1%|▊                                                         | 561/40504 [00:00<00:19, 2014.14it/s][A
  2%|█                                                         | 763/40504 [00:00<00:21, 1841.66it/s][A
  2%|█▎                                                        | 950/40504 [00:00<00:23, 1709.32it/s][A
  3%|█▌                                                       | 1124/40504 [00:00<00:24, 1580.92it/s][A
  3%|█▊                                                       | 1285/40504 [00:00<00:25, 1548.94it/s][A
  4%|██                                                       | 1495/40504 [00:00<00:22, 1706.97it/s][A
  4%|██▍                                              

 34%|███████████████████▎                                    | 13944/40504 [00:08<00:14, 1830.75it/s][A
 35%|███████████████████▌                                    | 14130/40504 [00:08<00:14, 1765.62it/s][A
 35%|███████████████████▊                                    | 14309/40504 [00:08<00:16, 1625.50it/s][A
 36%|████████████████████                                    | 14475/40504 [00:08<00:16, 1556.03it/s][A
 36%|████████████████████▎                                   | 14675/40504 [00:09<00:15, 1673.37it/s][A
 37%|████████████████████▌                                   | 14876/40504 [00:09<00:14, 1765.33it/s][A
 37%|████████████████████▊                                   | 15056/40504 [00:09<00:14, 1758.90it/s][A
 38%|█████████████████████                                   | 15234/40504 [00:09<00:15, 1655.64it/s][A
 38%|█████████████████████▎                                  | 15402/40504 [00:09<00:16, 1517.70it/s][A
 38%|█████████████████████▌                            

 69%|██████████████████████████████████████▌                 | 27884/40504 [00:17<00:08, 1497.29it/s][A
 69%|██████████████████████████████████████▊                 | 28073/40504 [00:17<00:07, 1602.14it/s][A
 70%|███████████████████████████████████████                 | 28296/40504 [00:17<00:06, 1776.74it/s][A
 70%|███████████████████████████████████████▎                | 28477/40504 [00:17<00:07, 1702.41it/s][A
 71%|███████████████████████████████████████▌                | 28650/40504 [00:17<00:07, 1585.93it/s][A
 71%|███████████████████████████████████████▊                | 28812/40504 [00:17<00:07, 1500.91it/s][A
 72%|████████████████████████████████████████                | 28972/40504 [00:17<00:07, 1525.79it/s][A
 72%|████████████████████████████████████████▎               | 29145/40504 [00:18<00:07, 1579.07it/s][A
 72%|████████████████████████████████████████▌               | 29349/40504 [00:18<00:06, 1706.68it/s][A
 73%|████████████████████████████████████████▊         

In [14]:
val_dl = coco_data_prep.get_dataloader(val_ds, 
                                         batch_size=250, 
                                         device=torch.device('cpu'))

### Load Model

#### Densenet121

In [15]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=True)

Using cache found in /home/ec2-user/.cache/torch/hub/pytorch_vision_v0.10.0


In [16]:
def slice_model(original_model, from_layer=None, to_layer=None):
    return nn.Sequential(*list(original_model.children())[from_layer:to_layer])

In [17]:
model_conv_features = slice_model(model, to_layer=-1).to(device)
# model_2 = torch.nn.DataParallel(model_conv_features, device_ids=[0,1]).cuda()

In [23]:
model_conv_features

Sequential(
  (0): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): R

In [25]:
features_list = []
labels_list = []

# Use GPUs to speed up the inference, this should take around 10 minutes

model_conv_features.to(device)
for batch in tqdm(val_dl):
    image_batch, label_batch = [x[0] for x in batch], [x[1] for x in batch]
    image_batch = torch.stack(image_batch).to(device) 

    with torch.no_grad():
        features_batch = model_conv_features(image_batch).flatten(start_dim=1)
    features_list.append(features_batch)
    labels_list.extend(label_batch)


  0%|                                                                         | 0/85 [00:00<?, ?it/s][A
  1%|▊                                                                | 1/85 [00:30<42:37, 30.44s/it][A
  0%|                                                                      | 0/25449 [04:47<?, ?it/s][A

  4%|██▎                                                              | 3/85 [00:59<25:38, 18.77s/it][A
  5%|███                                                              | 4/85 [01:08<20:02, 14.84s/it][A
  6%|███▊                                                             | 5/85 [01:17<16:55, 12.69s/it][A
  7%|████▌                                                            | 6/85 [01:26<15:10, 11.52s/it][A
  8%|█████▎                                                           | 7/85 [01:35<13:58, 10.75s/it][A
  9%|██████                                                           | 8/85 [01:44<13:08, 10.24s/it][A
 11%|██████▉                                         

 92%|██████████████████████████████████████████████████████████▋     | 78/85 [12:17<01:03,  9.03s/it][A
 93%|███████████████████████████████████████████████████████████▍    | 79/85 [12:26<00:54,  9.01s/it][A
 94%|████████████████████████████████████████████████████████████▏   | 80/85 [12:35<00:45,  9.01s/it][A
 95%|████████████████████████████████████████████████████████████▉   | 81/85 [12:44<00:35,  8.99s/it][A
 96%|█████████████████████████████████████████████████████████████▋  | 82/85 [12:53<00:26,  8.99s/it][A
 98%|██████████████████████████████████████████████████████████████▍ | 83/85 [13:02<00:17,  8.99s/it][A
 99%|███████████████████████████████████████████████████████████████▏| 84/85 [13:11<00:09,  9.01s/it][A
100%|████████████████████████████████████████████████████████████████| 85/85 [13:11<00:00,  9.32s/it][A


In [26]:
for i, vecs in enumerate(features_list):
    torch.save(vecs, f'../data/torch_embeddings/densenet_pretrained_val/dense-embed-val-batch-{i}.pt')