In [1]:
import json
import os
import sys

In [2]:
# os.chdir('../')

In [26]:
from icecream import ic
import numpy as np
import torch
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models import densenet
from tqdm import tqdm

In [4]:
sys.path.append('../dataset/')
import coco_data_prep

loading annotations into memory...
Done (t=12.21s)
creating index...
index created!
loading annotations into memory...
Done (t=6.65s)
creating index...
index created!
loading annotations into memory...
Done (t=0.12s)
creating index...
index created!


In [5]:
%load_ext autoreload

### Helper Functions

In [72]:
def calc_new_dim(orig_h_or_w: int, padding, kernel, stride) -> int:
    """
    :orig_h_or_w: original width or original height of the input image
    given the original width or height, kernel size, pad width, stride size
    calculate the new width or height
    """
    new_dim = (orig_h_or_w + (2 * padding) - kernel) // stride + 1
    return new_dim

### Global Variables

In [6]:
train_np_data_dir = '../data/numpy_imgs/train_subset/'
train_jpg_data_dir = '../data/raw/train/train2014/'
train_annot_filepath = '../data/raw/train/annotations/instances_train2014.json'

with open('../dataset/imgs_by_supercategory.json', 'r') as f:
    desired_categories = json.load(f)

In [61]:
val_np_data_dir = '../data/numpy_imgs/val_subset/'
val_jpg_data_dir = '../data/raw/val/val2014/'
val_annot_filepath = '../data/raw/train/annotations/instances_val2014.json'

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device('cpu')

### Load Data

In [65]:
%autoreload

In [66]:
train_ds = coco_data_prep.COCODataset('train',
                                      train_np_data_dir, 
                                      train_annot_filepath,
                                      sample_ratio=0.05,
                                      device='cpu')

loading annotations into memory...
Done (t=15.53s)
creating index...
index created!


100%|████████████████████████████████████████████████████████| 82783/82783 [00:54<00:00, 1505.97it/s]


In [67]:
train_dl = coco_data_prep.get_dataloader(train_ds, 
                                         batch_size=128, 
                                         device='cpu')

In [68]:
val_ds = coco_data_prep.COCODataset('val',
                                    val_np_data_dir, 
                                    val_annot_filepath,
                                    sample_ratio = 0.05,
                                    device='cpu')

loading annotations into memory...
Done (t=9.87s)
creating index...
index created!


100%|████████████████████████████████████████████████████████| 40504/40504 [00:28<00:00, 1424.43it/s]


In [69]:
val_dl = coco_data_prep.get_dataloader(val_ds, 
                                     batch_size=128, 
                                     device='cpu')

### Load Model

#### Densenet121

In [70]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=True)

Using cache found in /home/ec2-user/.cache/torch/hub/pytorch_vision_v0.10.0


In [71]:
model

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [112]:
mine = nn.Sequential(
    model.features.conv0,
    model.features.norm0,
    model.features.relu0,
    model.features.pool0,
    nn.BatchNorm2d(64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True),
    nn.ReLU(inplace=True),
    nn.Conv2d(64, 32, kernel_size=(7, 7), stride=(1, 1), padding=(0, 0), bias=False),
    nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Linear(in_features=np.prod((128, 32, 25)), out_features=1024, bias=True)
)
print(mine)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
  (6): Conv2d(64, 32, kernel_size=(7, 7), stride=(1, 1), bias=False)
  (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Linear(in_features=102400, out_features=1024, bias=True)
)


In [113]:
features_list = []
labels_list = []

# Use GPUs to speed up the inference, this should take around 10 minutes

mine.to('cpu')
for batch in tqdm(train_dl):
    image_batch, label_batch = [x[0] for x in batch], [x[1] for x in batch]
    image_batch = torch.stack(image_batch).to('cpu') 

    with torch.no_grad():
        features_batch = mine(image_batch) # .flatten(start_dim=1)
    features_list.append(features_batch)
    labels_list.extend(label_batch)

  0%|                                                                         | 0/33 [00:11<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (102400x25 and 102400x1024)

In [107]:
features_list[0].shape

torch.Size([128, 32, 25, 25])

In [None]:
for epoch in range(100):
    running_loss = 0.0
    count = 0
    for x,y in data_loader:
        try:
            optimizer.zero_grad()
            outputs = model(x.float())
            loss = criterion(outputs, y.double())
            loss.backward()
            optimizer.step()
            count += 1
            running_loss += loss.item()
            #scheduler.step()
            if count % 100 == 99:    # print every 10 mini-batches
                print('[%d, %5d] loss: %.3f' %
                        (epoch + 1, count + 1, running_loss / 100))
                running_loss = 0.0
        except:
            print('not enough in batch_size')
    print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))

In [22]:
torch.cuda.empty_cache()

In [57]:
features_list[0].shape

torch.Size([64, 802816])

In [None]:
for i, vecs in enumerate(features_list):
    torch.save(vecs, f'../data/torch_embeddings/densenet_pretrained_embs_len_50176-{i}.pt')

In [None]:
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)

In [12]:
class DensenetInspired(nn.Module):
    def __init__(self):
        super(DensenetInspired).__init__()
        
        self.conv0 = model.features.conv0
        self.bn0 = model.features.norm0
        self.relu0 = model.features.relu0
        self.pool0 = model.features.pool0
        self.db0 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(64, 32, kernel_size(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        self.linear = nn.Linear(in_features=1024, out_features=1000, bias=True)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x