<a href="https://colab.research.google.com/github/rage-against-the-machine-learning/cs7643-DL-Group-Project/blob/plot-utilities/Homegrown_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Reference: https://towardsdatascience.com/residual-network-implementing-resnet-a7da63c7b278

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
numpy_data_path = "/content/drive/MyDrive/numpy_images"

Training Data

In [None]:
!wget https://cs7643-raml.s3.us-west-2.amazonaws.com/train_subset_np_imgs.zip
!wget https://cs7643-raml.s3.us-west-2.amazonaws.com/instances_train_subset.json

--2021-07-29 15:33:54--  https://cs7643-raml.s3.us-west-2.amazonaws.com/train_subset_np_imgs.zip
Resolving cs7643-raml.s3.us-west-2.amazonaws.com (cs7643-raml.s3.us-west-2.amazonaws.com)... 52.218.152.161
Connecting to cs7643-raml.s3.us-west-2.amazonaws.com (cs7643-raml.s3.us-west-2.amazonaws.com)|52.218.152.161|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5972049681 (5.6G) [application/zip]
Saving to: ‘train_subset_np_imgs.zip’


2021-07-29 15:38:35 (20.3 MB/s) - ‘train_subset_np_imgs.zip’ saved [5972049681/5972049681]

--2021-07-29 15:38:35--  https://cs7643-raml.s3.us-west-2.amazonaws.com/instances_train_subset.json
Resolving cs7643-raml.s3.us-west-2.amazonaws.com (cs7643-raml.s3.us-west-2.amazonaws.com)... 52.218.176.81
Connecting to cs7643-raml.s3.us-west-2.amazonaws.com (cs7643-raml.s3.us-west-2.amazonaws.com)|52.218.176.81|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 580614542 (554M) [application/json]
Saving to: ‘instanc

In [None]:

! mkdir -p {numpy_data_path}/data
! unzip -q train_subset_np_imgs.zip -d {numpy_data_path}/data
! mkdir -p {numpy_data_path}/annotations
! mv instances_train_subset.json  {numpy_data_path}/annotations

Dataset and DataLoader

In [None]:
import os
import sys
import multiprocessing as mp
import random

In [None]:
sys.path.append("/content/drive/MyDrive/dataset")
#import coco_api_helper
#import coco_data_prep
import config_dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import torch 
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [None]:
import os
import json
with open (os.path.join(numpy_data_path, "annotations","categories.json"), 'r') as j:
    desired_categories = json.load(j)
train_annot = COCO("/content/drive/MyDrive/numpy_images/annotations/instances_train2014.json")
category_ids = [cat["id"] for cat in desired_categories]
img_ids = [train_annot.getImgIds(catIds=[id]) for id in category_ids]


# flatten to a list of imgids
desired_img_ids = list(set([ii for img_id in img_ids for ii in img_id]))

loading annotations into memory...
Done (t=16.84s)
creating index...


KeyboardInterrupt: ignored

In [None]:
os.listdir("/content/drive/MyDrive/numpy_images/data/train_subset")

In [None]:
import os
subset_list = os.listdir("/content/drive/MyDrive/numpy_images/data/train_subset")

In [None]:
test = train_annot.loadImgs(desired_img_ids)
subset_list = os.listdir("/content/drive/MyDrive/numpy_images/data/train_subset")
test_dict = dict(zip(desired_img_ids, [item["file_name"].replace(".jpg",".np") for item in test]))
final = {k: v for (k, v) in test_dict.items() if v in subset_list}
desired_img_ids = list(final.keys())

In [None]:
def get_label_map(label_file):
    label_map = {}
    labels = open(label_file, 'r')
    for line in labels:
        ids = line.split(',')
        label_map[int(ids[1])] = int(ids[0])
    return label_map

In [None]:
class COCOAnnotationTransform(object):
    """Transforms a COCO annotation into a Tensor of bbox coords and label index
    Initilized with a dictionary lookup of classnames to indexes
    """
    def __init__(self):
        self.label_map = get_label_map('/content/drive/MyDrive/dataset/coco_labels.txt')

    def __call__(self, target, width = 224, height = 224):
        """
        Args:
            target (dict): COCO target json annotation as a python dict
            height (int): height
            width (int): width
        Returns:
            a list containing lists of bounding boxes  [bbox coords, class idx]
        """
        scale = np.array([width, height, width, height])
        res = []
        for obj in target:

            if 'bbox' in obj:
                bbox = obj['bbox']
                bbox[2] += bbox[0]
                bbox[3] += bbox[1]
                label_idx = self.label_map[obj['category_id']] # -1
                final_box = list(np.array(bbox)/scale)
                final_box.append(label_idx)
                res += [final_box]  # [xmin, ymin, xmax, ymax, label_idx]
            else:
                print("no bbox problem!")

        return res[0]  # [[xmin, ymin, xmax, ymax, label_idx], ... ]

In [None]:
class COCODataset(Dataset):
    def __init__(self,
                 np_img_data_dir,
                 annot_filepath,
                 target_transform = COCOAnnotationTransform(),
                 sample_ratio: float = None,
                 device="cpu"):
        """
        :np_img_data_dir: local directory where you have unzipped np files saved 
            np files are the trasnformed images from s3 that were normalized/ resized/ padded
        :annot_filepath: filepath of the original coco dataset corresponding to the datasplit of your choosing
        :sample_ratio: specified float between 0 and 1 for the % of images from the data split you want to use
        :device: cpu or gpu
        """
        self.device = device
        self.np_img_data_dir = np_img_data_dir

        self.sample_ratio = sample_ratio
        self.coco = COCO(annot_filepath)
        self.target_transform = target_transform

        # All possible image ids
        all_train_img_ids = list(self.coco.imgs.keys())
        # Filter down to the image ids applicable to our supercategories
        
        self.ids = [ii for ii in tqdm(all_train_img_ids) if ii in desired_img_ids]

        if self.sample_ratio is None:
            pass
        else:
            self.ids = list(np.random.choice(self.ids, int(self.sample_ratio * len(self.ids)), replace=False))

    def __getitem__(self, index):
        coco = self.coco

        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=[img_id])
        labels = coco.loadAnns(ann_ids)
        cat_list = [item["category_id"] for item in labels]
        target = max(set(cat_list), key=cat_list.count)
        target = torch.Tensor([target])

        path = coco.loadImgs([img_id])[0]['file_name']
        np_path = path.split('.')[0] + '.np'
        img = np.load(os.path.join(self.np_img_data_dir, np_path))
        
        # Convert the image to tensor so it's compatible w/ pytorch data loader
        img = torch.Tensor(img.transpose(2,0,1)).to(device=self.device).float()

        # Target transformation needs to happen to extract bounding boxes from annots
        #if self.target_transform is not None:
        #    target = torch.Tensor(self.target_transform(target))


        return img, target, labels

    def __len__(self):
        return len(self.ids)
    
def get_dataloader(dataset_obj,
                   batch_size: int=100,
                   device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
                   loader_params:dict = {"shuffle" : False,
                                         "num_workers": mp.cpu_count(),
                                         "collate_fn": lambda x: x}):
    """Returns data loader for custom dataset.
    Args:
        :dataset_obj: dataset object returned from COCODataset class
        :batch_size: specified batch size, if you have memory errors while running, make this smaller
        :device: default is CUDA if there is GPU otherwise CPU
        :loader_params: default is found in config_dataset.py's dataloader_params dict
            can specify your own data loader params, but collate_fn must be `lambda x: x`
            specify:
                shuffle: True / False
                num_workers: number of parallel processes to run
                collate_fn: lambda x: x
    Returns:
        data_loader: data loader for COCODataset.
    """
    # data loader for custom dataset
    # this will return (imgs, targets) for each iteration
    loader_params.update({'batch_size': batch_size})
    
    data_loader = DataLoader(
        dataset=dataset_obj, 
        **loader_params,
    )
    return data_loader

In [None]:
np_data_dir = "/content/drive/MyDrive/numpy_images/data/train_subset"
np_annot_dir = "/content/drive/MyDrive/numpy_images/annotations/instances_train2014.json"

In [None]:
train_dataset = COCODataset(np_data_dir, np_annot_dir, sample_ratio=0.25)

loading annotations into memory...
Done (t=15.62s)
creating index...


  0%|          | 23/82783 [00:00<06:05, 226.23it/s]

index created!


100%|██████████| 82783/82783 [02:40<00:00, 515.78it/s]


In [None]:
from torch.utils.data import DataLoader
from torch.multiprocessing import Pool, Process, set_start_method
import torch.multiprocessing
cpu_count = torch.multiprocessing.cpu_count()
print('cpus', cpu_count)

try: 
  set_start_method("spawn")
except RuntimeError:
  pass
batch_size = 64

train_dl = torch.utils.data.DataLoader(dataset=train_dataset, 
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=cpu_count,
                                          collate_fn=lambda x: x)

cpus 4


In [None]:
image_batches = []
label_list = []
annot_list = []

for batch in tqdm(train_dl):
  image_batch, label_batch, annot_batch = [x[0] for x in batch], [x[1] for x in batch], [x[2] for x in batch]
  label_list.extend(label_batch)



100%|██████████| 7/7 [00:03<00:00,  2.23it/s]


ResNet

In [None]:
import sys
sys.path.append("/content/drive/MyDrive/modules")
import resnet_modules as rn

In [None]:
def resnet18(in_channels, n_classes, block=rn.ResNetBasicBlock, *args, **kwargs):
  return rn.ResNet(in_channels, n_classes, block=block, deepths=[2,2,2,2], *args, **kwargs)

In [None]:
from torchsummary import summary
model = resnet18(in_channels=3,n_classes=99)
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
        Conv2dAuto-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
        Conv2dAuto-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
 ResNetBasicBlock-10           [-1, 64, 56, 56]               0
       Conv2dAuto-11           [-1, 64, 56, 56]          36,864
      BatchNorm2d-12           [-1, 64, 56, 56]             128
             ReLU-13           [-1, 64, 56, 56]               0
       Conv2dAuto-14           [-1, 64,

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Set Device

In [None]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print(torch.cuda.current_device())
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))
    
device

device(type='cpu')

In [None]:
import torch.nn as nn

Initialize weights

In [None]:
def initialize_custom_block(m):
  if isinstance(m, rn.Conv2dAuto):
    torch.nn.init.xavier_uniform(m.weight)

In [None]:
def initialize_network(m):
  if isinstance(m, nn.Conv2d):
    torch.nn.init.xavier_uniform(m.weight)
  elif isinstance(m, nn.BatchNorm2d):
    torch.nn.init.constant_(m.weight.data, 1)
    torch.nn.init.constant_(m.bias.data, 0)
  elif isinstance(m, rn.Conv2dAuto):
    torch.nn.init.xavier_uniform(m.weight)
  elif isinstance(m, rn.CustomResNetBlock):
    m.apply(initialize_custom_block)
  elif isinstance(m, rn.ResNetLayer):
    m.apply(initialize_custom_block)



In [None]:
model.apply(initialize_network)

  This is separate from the ipykernel package so we can avoid doing imports until


ResNet(
  (encoder): ResNetEncoder(
    (gate): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (blocks): ModuleList(
      (0): ResNetLayer(
        (blocks): Sequential(
          (0): ResNetBasicBlock(
            (blocks): Sequential(
              (0): Sequential(
                (conv): Conv2dAuto(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (1): ReLU()
              (2): Sequential(
                (conv): Conv2dAuto(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runn

In [None]:
for param in model.parameters():
  print(param)

Parameter containing:
tensor([[[[ 3.6744e-02, -2.8817e-02, -8.9449e-03,  ...,  1.4965e-03,
            1.4402e-02,  7.0007e-03],
          [ 1.5664e-02, -1.5393e-02,  1.0652e-02,  ..., -1.0610e-02,
            2.3777e-02,  5.1124e-03],
          [ 4.4250e-03,  1.8421e-02, -1.8576e-02,  ...,  2.3980e-02,
           -1.6193e-02,  1.6713e-02],
          ...,
          [ 3.3926e-03, -1.4656e-02,  1.7499e-02,  ..., -3.4492e-03,
            4.0343e-02,  1.5561e-02],
          [ 1.7671e-02, -1.6672e-03,  2.0160e-02,  ...,  3.3077e-02,
            9.4231e-03, -1.5566e-02],
          [ 1.3024e-02,  1.2205e-02,  1.1552e-02,  ...,  1.9591e-02,
           -1.9248e-03,  2.6710e-02]],

         [[ 3.5788e-02,  7.4135e-03, -2.5529e-03,  ...,  2.1407e-02,
            2.0256e-02,  6.6988e-03],
          [ 1.7754e-02, -1.7363e-02, -5.2848e-03,  ..., -6.6432e-04,
            1.5352e-02, -9.3723e-03],
          [ 3.1912e-02,  2.7844e-02,  2.3081e-02,  ...,  2.9375e-02,
            2.7260e-03,  2.4742e-02]

In [None]:
import torch.optim as optim
import torch.nn as nn

optim_params= dict(
    lr=0.001,
    betas=(0.9, 0.999),
    eps=1e-05,
    weight_decay=0.01,
    amsgrad=False,
)
optimizer = torch.optim.Adam(model.parameters(), **optim_params)
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
model.to(device)
softmax = nn.Softmax()
img_id_list = []
for epoch in range(1):
  counter = 0
  running_loss = 0
  mini_batch_counter = 0
  for batch in tqdm(train_dl):
    
    # extract images and labels
    image_batch, label_batch, annot_batch = [x[0] for x in batch], [x[1] for x in batch], [x[2] for x in batch]
    img_ids_to_append = [int(item[0]) for item in label_batch]
    img_id_list.append(img_ids_to_append)
    targets = torch.LongTensor([int(item) for item in label_batch]).to(device)
    image_batch = torch.stack(image_batch).to(device)
    

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = model(image_batch)
    
    outputs = softmax(outputs).to(device)
    loss = criterion(outputs, targets)
    print(loss)
    loss.backward()
    optimizer.step()

    # print stats
    running_loss += loss.item()
    if counter % 10 == 9:
      print('[Epoch: %d, batch: %5d] average loss: %.3f' %
                  (epoch + 1, counter + 1, running_loss /  (counter + 1)))
      running_loss = 0.0
    counter += 1
    torch.save(model.state_dict(), "/content/drive/MyDrive/model/mini-resnet.pth")







tensor(4.3199, grad_fn=<NllLossBackward>)



  1%|          | 1/169 [00:10<29:29, 10.53s/it][A

tensor(4.1622, grad_fn=<NllLossBackward>)



  1%|          | 2/169 [00:17<26:05,  9.37s/it][A

tensor(4.3670, grad_fn=<NllLossBackward>)



  2%|▏         | 3/169 [00:23<23:19,  8.43s/it][A

tensor(4.3274, grad_fn=<NllLossBackward>)



  2%|▏         | 4/169 [00:29<21:29,  7.82s/it][A

tensor(4.2336, grad_fn=<NllLossBackward>)



  3%|▎         | 5/169 [00:36<20:11,  7.39s/it][A

tensor(4.1897, grad_fn=<NllLossBackward>)



  4%|▎         | 6/169 [00:42<19:18,  7.11s/it][A

tensor(4.3144, grad_fn=<NllLossBackward>)



  4%|▍         | 7/169 [00:48<18:24,  6.82s/it][A

tensor(4.2355, grad_fn=<NllLossBackward>)



  5%|▍         | 8/169 [00:55<17:50,  6.65s/it][A

tensor(4.2859, grad_fn=<NllLossBackward>)



  5%|▌         | 9/169 [01:01<17:30,  6.56s/it][A

tensor(4.2621, grad_fn=<NllLossBackward>)



  6%|▌         | 10/169 [01:07<17:18,  6.53s/it][A

[Epoch: 1, batch:    10] average loss: 4.270
tensor(4.3338, grad_fn=<NllLossBackward>)



  7%|▋         | 11/169 [01:14<16:53,  6.42s/it][A

tensor(4.2809, grad_fn=<NllLossBackward>)



  7%|▋         | 12/169 [01:20<16:39,  6.36s/it][A

tensor(4.3200, grad_fn=<NllLossBackward>)



  8%|▊         | 13/169 [01:26<16:28,  6.34s/it][A

tensor(4.3395, grad_fn=<NllLossBackward>)



  8%|▊         | 14/169 [01:32<16:25,  6.36s/it][A

tensor(4.1843, grad_fn=<NllLossBackward>)



  9%|▉         | 15/169 [01:39<16:06,  6.28s/it][A

tensor(4.4243, grad_fn=<NllLossBackward>)



  9%|▉         | 16/169 [01:45<15:59,  6.27s/it][A

tensor(4.3528, grad_fn=<NllLossBackward>)



 10%|█         | 17/169 [01:51<15:51,  6.26s/it][A

tensor(4.3596, grad_fn=<NllLossBackward>)



 11%|█         | 18/169 [01:57<15:47,  6.28s/it][A

tensor(4.2348, grad_fn=<NllLossBackward>)



 11%|█         | 19/169 [02:03<15:31,  6.21s/it][A

tensor(4.2819, grad_fn=<NllLossBackward>)



 12%|█▏        | 20/169 [02:10<15:26,  6.22s/it][A

[Epoch: 1, batch:    20] average loss: 2.156
tensor(4.2021, grad_fn=<NllLossBackward>)



 12%|█▏        | 21/169 [02:16<15:20,  6.22s/it][A

tensor(4.3965, grad_fn=<NllLossBackward>)



 13%|█▎        | 22/169 [02:22<15:28,  6.32s/it][A

tensor(4.3476, grad_fn=<NllLossBackward>)



 14%|█▎        | 23/169 [02:29<15:13,  6.26s/it][A

tensor(4.2705, grad_fn=<NllLossBackward>)



 14%|█▍        | 24/169 [02:35<15:10,  6.28s/it][A

tensor(4.2579, grad_fn=<NllLossBackward>)



 15%|█▍        | 25/169 [02:41<15:04,  6.28s/it][A

tensor(4.2625, grad_fn=<NllLossBackward>)



 15%|█▌        | 26/169 [02:48<15:08,  6.35s/it][A

tensor(4.3437, grad_fn=<NllLossBackward>)



 16%|█▌        | 27/169 [02:54<14:53,  6.29s/it][A

tensor(4.2215, grad_fn=<NllLossBackward>)



 17%|█▋        | 28/169 [03:00<14:49,  6.31s/it][A

tensor(4.2764, grad_fn=<NllLossBackward>)



 17%|█▋        | 29/169 [03:06<14:41,  6.30s/it][A

tensor(4.1423, grad_fn=<NllLossBackward>)



 18%|█▊        | 30/169 [03:13<14:40,  6.33s/it][A

[Epoch: 1, batch:    30] average loss: 1.424
tensor(4.2359, grad_fn=<NllLossBackward>)



 18%|█▊        | 31/169 [03:19<14:24,  6.27s/it][A

tensor(4.1879, grad_fn=<NllLossBackward>)



 19%|█▉        | 32/169 [03:25<14:17,  6.26s/it][A

tensor(4.2369, grad_fn=<NllLossBackward>)



 20%|█▉        | 33/169 [03:31<14:12,  6.27s/it][A

tensor(4.2027, grad_fn=<NllLossBackward>)



 20%|██        | 34/169 [03:38<14:16,  6.34s/it][A

tensor(4.2226, grad_fn=<NllLossBackward>)



 21%|██        | 35/169 [03:44<14:03,  6.29s/it][A

tensor(4.1449, grad_fn=<NllLossBackward>)



 21%|██▏       | 36/169 [03:50<13:54,  6.27s/it][A

tensor(4.2176, grad_fn=<NllLossBackward>)



 22%|██▏       | 37/169 [03:57<13:47,  6.27s/it][A

tensor(4.1572, grad_fn=<NllLossBackward>)



 22%|██▏       | 38/169 [04:03<13:48,  6.33s/it][A

tensor(4.3835, grad_fn=<NllLossBackward>)



 23%|██▎       | 39/169 [04:09<13:34,  6.27s/it][A

tensor(4.2067, grad_fn=<NllLossBackward>)



 24%|██▎       | 40/169 [04:16<13:28,  6.27s/it][A

[Epoch: 1, batch:    40] average loss: 1.055
tensor(4.2235, grad_fn=<NllLossBackward>)



 24%|██▍       | 41/169 [04:22<13:21,  6.26s/it][A

tensor(4.2686, grad_fn=<NllLossBackward>)



 25%|██▍       | 42/169 [04:28<13:24,  6.33s/it][A

tensor(4.2112, grad_fn=<NllLossBackward>)



 25%|██▌       | 43/169 [04:35<13:34,  6.47s/it][A

tensor(4.1579, grad_fn=<NllLossBackward>)



 26%|██▌       | 44/169 [04:44<15:18,  7.35s/it][A

tensor(4.3049, grad_fn=<NllLossBackward>)



 27%|██▋       | 45/169 [04:54<16:31,  7.99s/it][A

tensor(4.2115, grad_fn=<NllLossBackward>)



 27%|██▋       | 46/169 [05:02<16:11,  7.90s/it][A

tensor(4.2103, grad_fn=<NllLossBackward>)



 28%|██▊       | 47/169 [05:08<14:59,  7.37s/it][A

tensor(4.2498, grad_fn=<NllLossBackward>)



 28%|██▊       | 48/169 [05:14<14:13,  7.06s/it][A

tensor(4.2570, grad_fn=<NllLossBackward>)



 29%|██▉       | 49/169 [05:20<13:38,  6.82s/it][A

tensor(4.2334, grad_fn=<NllLossBackward>)



 30%|██▉       | 50/169 [05:27<13:22,  6.75s/it][A

[Epoch: 1, batch:    50] average loss: 0.847
tensor(4.2417, grad_fn=<NllLossBackward>)



 30%|███       | 51/169 [05:33<12:53,  6.56s/it][A

tensor(4.3372, grad_fn=<NllLossBackward>)



 31%|███       | 52/169 [05:39<12:37,  6.47s/it][A

tensor(4.1324, grad_fn=<NllLossBackward>)



 31%|███▏      | 53/169 [05:46<12:25,  6.42s/it][A

tensor(4.1405, grad_fn=<NllLossBackward>)



 32%|███▏      | 54/169 [05:52<12:19,  6.43s/it][A

tensor(4.2687, grad_fn=<NllLossBackward>)



 33%|███▎      | 55/169 [05:58<12:01,  6.33s/it][A

tensor(4.2452, grad_fn=<NllLossBackward>)



 33%|███▎      | 56/169 [06:04<11:51,  6.29s/it][A

tensor(4.3021, grad_fn=<NllLossBackward>)



 34%|███▎      | 57/169 [06:11<11:46,  6.31s/it][A

tensor(4.2010, grad_fn=<NllLossBackward>)



 34%|███▍      | 58/169 [06:17<11:48,  6.38s/it][A

tensor(4.1774, grad_fn=<NllLossBackward>)



 35%|███▍      | 59/169 [06:23<11:32,  6.30s/it][A

tensor(4.3140, grad_fn=<NllLossBackward>)



 36%|███▌      | 60/169 [06:30<11:26,  6.30s/it][A

[Epoch: 1, batch:    60] average loss: 0.706
tensor(4.2211, grad_fn=<NllLossBackward>)



 36%|███▌      | 61/169 [06:36<11:19,  6.29s/it][A

tensor(4.3040, grad_fn=<NllLossBackward>)



 37%|███▋      | 62/169 [06:42<11:20,  6.36s/it][A

tensor(4.2091, grad_fn=<NllLossBackward>)



 37%|███▋      | 63/169 [06:49<11:05,  6.28s/it][A

tensor(4.2650, grad_fn=<NllLossBackward>)



 38%|███▊      | 64/169 [06:55<10:56,  6.26s/it][A

tensor(4.2280, grad_fn=<NllLossBackward>)



 38%|███▊      | 65/169 [07:01<10:53,  6.29s/it][A

tensor(4.1917, grad_fn=<NllLossBackward>)



 39%|███▉      | 66/169 [07:08<10:53,  6.34s/it][A

tensor(4.2379, grad_fn=<NllLossBackward>)



 40%|███▉      | 67/169 [07:14<10:40,  6.28s/it][A

tensor(4.2623, grad_fn=<NllLossBackward>)



 40%|████      | 68/169 [07:20<10:33,  6.28s/it][A

tensor(4.2056, grad_fn=<NllLossBackward>)



 41%|████      | 69/169 [07:26<10:26,  6.27s/it][A

tensor(4.3074, grad_fn=<NllLossBackward>)



 41%|████▏     | 70/169 [07:33<10:26,  6.33s/it][A

[Epoch: 1, batch:    70] average loss: 0.606
tensor(4.2366, grad_fn=<NllLossBackward>)



 42%|████▏     | 71/169 [07:39<10:15,  6.28s/it][A

tensor(4.1220, grad_fn=<NllLossBackward>)



 43%|████▎     | 72/169 [07:45<10:07,  6.27s/it][A

tensor(4.2245, grad_fn=<NllLossBackward>)



 43%|████▎     | 73/169 [07:51<10:02,  6.27s/it][A

tensor(4.1049, grad_fn=<NllLossBackward>)



 44%|████▍     | 74/169 [07:58<10:00,  6.32s/it][A

tensor(4.2047, grad_fn=<NllLossBackward>)



 44%|████▍     | 75/169 [08:04<09:49,  6.27s/it][A

tensor(4.3138, grad_fn=<NllLossBackward>)



 45%|████▍     | 76/169 [08:10<09:43,  6.27s/it][A

tensor(4.1587, grad_fn=<NllLossBackward>)



 46%|████▌     | 77/169 [08:17<09:36,  6.26s/it][A

tensor(4.2333, grad_fn=<NllLossBackward>)



 46%|████▌     | 78/169 [08:23<09:35,  6.32s/it][A

tensor(4.1470, grad_fn=<NllLossBackward>)



 47%|████▋     | 79/169 [08:29<09:24,  6.28s/it][A

tensor(4.3134, grad_fn=<NllLossBackward>)



 47%|████▋     | 80/169 [08:36<09:20,  6.30s/it][A

[Epoch: 1, batch:    80] average loss: 0.526
tensor(4.2665, grad_fn=<NllLossBackward>)



 48%|████▊     | 81/169 [08:42<09:14,  6.30s/it][A

tensor(4.1617, grad_fn=<NllLossBackward>)



 49%|████▊     | 82/169 [08:48<09:13,  6.36s/it][A

tensor(4.2434, grad_fn=<NllLossBackward>)



 49%|████▉     | 83/169 [08:55<09:03,  6.32s/it][A

tensor(4.2516, grad_fn=<NllLossBackward>)



 50%|████▉     | 84/169 [09:01<08:57,  6.32s/it][A

tensor(4.1809, grad_fn=<NllLossBackward>)



 50%|█████     | 85/169 [09:07<08:48,  6.30s/it][A

tensor(4.2123, grad_fn=<NllLossBackward>)



 51%|█████     | 86/169 [09:13<08:43,  6.31s/it][A

tensor(4.3082, grad_fn=<NllLossBackward>)



 51%|█████▏    | 87/169 [09:20<08:32,  6.25s/it][A

tensor(4.2263, grad_fn=<NllLossBackward>)



 52%|█████▏    | 88/169 [09:26<08:26,  6.25s/it][A

tensor(4.2523, grad_fn=<NllLossBackward>)



 53%|█████▎    | 89/169 [09:32<08:21,  6.27s/it][A

tensor(4.3577, grad_fn=<NllLossBackward>)



 53%|█████▎    | 90/169 [09:39<08:20,  6.33s/it][A

[Epoch: 1, batch:    90] average loss: 0.472
tensor(4.2260, grad_fn=<NllLossBackward>)



 54%|█████▍    | 91/169 [09:45<08:11,  6.30s/it][A

tensor(4.1853, grad_fn=<NllLossBackward>)



 54%|█████▍    | 92/169 [09:51<08:03,  6.28s/it][A

tensor(4.1898, grad_fn=<NllLossBackward>)



 55%|█████▌    | 93/169 [09:57<07:59,  6.31s/it][A

tensor(4.2513, grad_fn=<NllLossBackward>)



 56%|█████▌    | 94/169 [10:04<07:56,  6.35s/it][A

tensor(4.1273, grad_fn=<NllLossBackward>)



 56%|█████▌    | 95/169 [10:10<07:43,  6.27s/it][A

tensor(4.1424, grad_fn=<NllLossBackward>)



 57%|█████▋    | 96/169 [10:16<07:35,  6.24s/it][A

tensor(4.2370, grad_fn=<NllLossBackward>)



 57%|█████▋    | 97/169 [10:22<07:28,  6.23s/it][A

tensor(4.2048, grad_fn=<NllLossBackward>)



 58%|█████▊    | 98/169 [10:29<07:26,  6.29s/it][A

tensor(4.2815, grad_fn=<NllLossBackward>)



 59%|█████▊    | 99/169 [10:35<07:17,  6.24s/it][A

tensor(4.1553, grad_fn=<NllLossBackward>)



 59%|█████▉    | 100/169 [10:41<07:09,  6.23s/it][A

[Epoch: 1, batch:   100] average loss: 0.420
tensor(4.2360, grad_fn=<NllLossBackward>)



 60%|█████▉    | 101/169 [10:47<07:02,  6.22s/it][A

tensor(4.2365, grad_fn=<NllLossBackward>)



 60%|██████    | 102/169 [10:54<06:58,  6.24s/it][A

tensor(4.3150, grad_fn=<NllLossBackward>)



 61%|██████    | 103/169 [11:00<06:48,  6.19s/it][A

tensor(4.2371, grad_fn=<NllLossBackward>)



 62%|██████▏   | 104/169 [11:06<06:41,  6.18s/it][A

tensor(4.2036, grad_fn=<NllLossBackward>)



 62%|██████▏   | 105/169 [11:12<06:35,  6.18s/it][A

tensor(4.3280, grad_fn=<NllLossBackward>)



 63%|██████▎   | 106/169 [11:18<06:31,  6.22s/it][A

tensor(4.2475, grad_fn=<NllLossBackward>)



 63%|██████▎   | 107/169 [11:24<06:23,  6.19s/it][A

tensor(4.2364, grad_fn=<NllLossBackward>)



 64%|██████▍   | 108/169 [11:31<06:16,  6.18s/it][A

tensor(4.0931, grad_fn=<NllLossBackward>)



 64%|██████▍   | 109/169 [11:37<06:11,  6.19s/it][A

tensor(4.1271, grad_fn=<NllLossBackward>)



 65%|██████▌   | 110/169 [11:43<06:06,  6.22s/it][A

[Epoch: 1, batch:   110] average loss: 0.384
tensor(4.3019, grad_fn=<NllLossBackward>)



 66%|██████▌   | 111/169 [11:49<05:57,  6.17s/it][A

tensor(4.1522, grad_fn=<NllLossBackward>)



 66%|██████▋   | 112/169 [11:55<05:51,  6.17s/it][A

tensor(4.2060, grad_fn=<NllLossBackward>)



 67%|██████▋   | 113/169 [12:01<05:45,  6.16s/it][A

tensor(4.2630, grad_fn=<NllLossBackward>)



 67%|██████▋   | 114/169 [12:08<05:41,  6.22s/it][A

tensor(4.1769, grad_fn=<NllLossBackward>)



 68%|██████▊   | 115/169 [12:14<05:33,  6.18s/it][A

tensor(4.3177, grad_fn=<NllLossBackward>)



 69%|██████▊   | 116/169 [12:20<05:28,  6.20s/it][A

tensor(4.2042, grad_fn=<NllLossBackward>)



 69%|██████▉   | 117/169 [12:26<05:22,  6.20s/it][A

tensor(4.2367, grad_fn=<NllLossBackward>)



 70%|██████▉   | 118/169 [12:33<05:18,  6.24s/it][A

tensor(4.1631, grad_fn=<NllLossBackward>)



 70%|███████   | 119/169 [12:39<05:10,  6.20s/it][A

tensor(4.2989, grad_fn=<NllLossBackward>)



 71%|███████   | 120/169 [12:45<05:03,  6.18s/it][A

[Epoch: 1, batch:   120] average loss: 0.353
tensor(4.2201, grad_fn=<NllLossBackward>)



 72%|███████▏  | 121/169 [12:51<04:56,  6.19s/it][A

tensor(4.2239, grad_fn=<NllLossBackward>)



 72%|███████▏  | 122/169 [12:57<04:53,  6.24s/it][A

tensor(4.1916, grad_fn=<NllLossBackward>)



 73%|███████▎  | 123/169 [13:04<04:44,  6.18s/it][A

tensor(4.1593, grad_fn=<NllLossBackward>)



 73%|███████▎  | 124/169 [13:10<04:37,  6.17s/it][A

tensor(4.3024, grad_fn=<NllLossBackward>)



 74%|███████▍  | 125/169 [13:16<04:30,  6.16s/it][A

tensor(4.1911, grad_fn=<NllLossBackward>)



 75%|███████▍  | 126/169 [13:22<04:26,  6.20s/it][A

tensor(4.1470, grad_fn=<NllLossBackward>)



 75%|███████▌  | 127/169 [13:28<04:18,  6.16s/it][A

tensor(4.1841, grad_fn=<NllLossBackward>)



 76%|███████▌  | 128/169 [13:34<04:13,  6.18s/it][A

tensor(4.2632, grad_fn=<NllLossBackward>)



 76%|███████▋  | 129/169 [13:41<04:08,  6.21s/it][A

tensor(4.1821, grad_fn=<NllLossBackward>)



 77%|███████▋  | 130/169 [13:47<04:02,  6.23s/it][A

[Epoch: 1, batch:   130] average loss: 0.324
tensor(4.2404, grad_fn=<NllLossBackward>)



 78%|███████▊  | 131/169 [13:53<03:55,  6.19s/it][A

tensor(4.2663, grad_fn=<NllLossBackward>)



 78%|███████▊  | 132/169 [13:59<03:49,  6.20s/it][A

tensor(4.2300, grad_fn=<NllLossBackward>)



 79%|███████▊  | 133/169 [14:06<03:43,  6.22s/it][A

tensor(4.2645, grad_fn=<NllLossBackward>)



 79%|███████▉  | 134/169 [14:12<03:38,  6.24s/it][A

tensor(4.2074, grad_fn=<NllLossBackward>)



 80%|███████▉  | 135/169 [14:18<03:31,  6.21s/it][A

tensor(4.2219, grad_fn=<NllLossBackward>)



 80%|████████  | 136/169 [14:24<03:24,  6.20s/it][A

tensor(4.2692, grad_fn=<NllLossBackward>)



 81%|████████  | 137/169 [14:30<03:19,  6.22s/it][A

tensor(4.1582, grad_fn=<NllLossBackward>)



 82%|████████▏ | 138/169 [14:37<03:14,  6.26s/it][A

tensor(4.2995, grad_fn=<NllLossBackward>)



 82%|████████▏ | 139/169 [14:43<03:06,  6.21s/it][A

tensor(4.1824, grad_fn=<NllLossBackward>)



 83%|████████▎ | 140/169 [14:49<03:00,  6.22s/it][A

[Epoch: 1, batch:   140] average loss: 0.302
tensor(4.2530, grad_fn=<NllLossBackward>)



 83%|████████▎ | 141/169 [14:55<02:53,  6.21s/it][A

tensor(4.2121, grad_fn=<NllLossBackward>)



 84%|████████▍ | 142/169 [15:04<03:08,  6.99s/it][A

tensor(4.1732, grad_fn=<NllLossBackward>)



 85%|████████▍ | 143/169 [15:15<03:32,  8.17s/it][A

tensor(4.1771, grad_fn=<NllLossBackward>)



 85%|████████▌ | 144/169 [15:22<03:14,  7.76s/it][A

tensor(4.2459, grad_fn=<NllLossBackward>)



 86%|████████▌ | 145/169 [15:28<02:56,  7.35s/it][A

tensor(4.2851, grad_fn=<NllLossBackward>)



 86%|████████▋ | 146/169 [15:35<02:43,  7.11s/it][A

tensor(4.1448, grad_fn=<NllLossBackward>)



 87%|████████▋ | 147/169 [15:41<02:30,  6.86s/it][A

tensor(4.2044, grad_fn=<NllLossBackward>)



 88%|████████▊ | 148/169 [15:47<02:20,  6.70s/it][A

tensor(4.3040, grad_fn=<NllLossBackward>)



 88%|████████▊ | 149/169 [15:54<02:11,  6.60s/it][A

tensor(4.2504, grad_fn=<NllLossBackward>)



 89%|████████▉ | 150/169 [16:00<02:04,  6.57s/it][A

[Epoch: 1, batch:   150] average loss: 0.282
tensor(4.1434, grad_fn=<NllLossBackward>)



 89%|████████▉ | 151/169 [16:06<01:56,  6.46s/it][A

tensor(4.1570, grad_fn=<NllLossBackward>)



 90%|████████▉ | 152/169 [16:13<01:49,  6.42s/it][A

tensor(4.2679, grad_fn=<NllLossBackward>)



 91%|█████████ | 153/169 [16:19<01:42,  6.38s/it][A

tensor(4.1580, grad_fn=<NllLossBackward>)



 91%|█████████ | 154/169 [16:26<01:36,  6.42s/it][A

tensor(4.1267, grad_fn=<NllLossBackward>)



 92%|█████████▏| 155/169 [16:32<01:29,  6.37s/it][A

tensor(4.1803, grad_fn=<NllLossBackward>)



 92%|█████████▏| 156/169 [16:38<01:22,  6.38s/it][A

tensor(4.1712, grad_fn=<NllLossBackward>)



 93%|█████████▎| 157/169 [16:45<01:16,  6.38s/it][A

tensor(4.2673, grad_fn=<NllLossBackward>)



 93%|█████████▎| 158/169 [16:51<01:10,  6.40s/it][A

tensor(4.1529, grad_fn=<NllLossBackward>)



 94%|█████████▍| 159/169 [16:57<01:03,  6.33s/it][A

tensor(4.2212, grad_fn=<NllLossBackward>)



 95%|█████████▍| 160/169 [17:04<00:57,  6.34s/it][A

[Epoch: 1, batch:   160] average loss: 0.262
tensor(4.1577, grad_fn=<NllLossBackward>)



 95%|█████████▌| 161/169 [17:10<00:50,  6.33s/it][A

tensor(4.2099, grad_fn=<NllLossBackward>)



 96%|█████████▌| 162/169 [17:16<00:44,  6.37s/it][A

tensor(4.2514, grad_fn=<NllLossBackward>)



 96%|█████████▋| 163/169 [17:23<00:37,  6.31s/it][A

tensor(4.1898, grad_fn=<NllLossBackward>)



 97%|█████████▋| 164/169 [17:29<00:31,  6.31s/it][A

tensor(4.1587, grad_fn=<NllLossBackward>)



 98%|█████████▊| 165/169 [17:35<00:25,  6.31s/it][A

tensor(4.2827, grad_fn=<NllLossBackward>)



 98%|█████████▊| 166/169 [17:41<00:18,  6.30s/it][A

tensor(4.2182, grad_fn=<NllLossBackward>)



 99%|█████████▉| 167/169 [17:48<00:12,  6.26s/it][A

tensor(4.2057, grad_fn=<NllLossBackward>)



 99%|█████████▉| 168/169 [17:54<00:06,  6.25s/it][A

tensor(4.2106, grad_fn=<NllLossBackward>)



100%|██████████| 169/169 [18:00<00:00,  6.39s/it]


In [None]:
len(train_dataset.ids)

10816

Slice model

In [None]:
from torch import nn
from torchsummary import summary

# Dropping output layer (the ResNet classifier)

def slice_model(original_model, from_layer=None, to_layer=None):
    return nn.Sequential(*list(original_model.children())[from_layer:to_layer])
model_conv_features = slice_model(model, to_layer=-1).to(device)
summary(model_conv_features, input_size=(3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
        Conv2dAuto-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
        Conv2dAuto-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
 ResNetBasicBlock-10           [-1, 64, 56, 56]               0
       Conv2dAuto-11           [-1, 64, 56, 56]          36,864
      BatchNorm2d-12           [-1, 64, 56, 56]             128
             ReLU-13           [-1, 64, 56, 56]               0
       Conv2dAuto-14           [-1, 64,

Validation dataset and dataloader

In [None]:
import os
import json
with open (os.path.join(numpy_data_path, "annotations","categories.json"), 'r') as j:
    desired_categories = json.load(j)
#with open (os.path.join(numpy_data_path, "annotations", "imgs_by_supercategory.json"), 'r') as f:
#    imgid_by_supercat = json.load(f)
valid_annot = COCO("/content/drive/MyDrive/numpy_images/annotations/instances_val2014.json")
category_ids = [cat["id"] for cat in desired_categories]
img_ids = [valid_annot.getImgIds(catIds=[id]) for id in category_ids]


# flatten to a list of imgids
desired_img_ids = list(set([ii for img_id in img_ids for ii in img_id]))

loading annotations into memory...
Done (t=8.19s)
creating index...
index created!


In [None]:
test = valid_annot.loadImgs(desired_img_ids)
subset_list = os.listdir("/content/drive/MyDrive/numpy_images/data/valid_subset")
test_dict = dict(zip(desired_img_ids, [item["file_name"].replace(".jpg",".np") for item in test]))
final = {k: v for (k, v) in test_dict.items() if v in subset_list}
desired_img_ids = list(final.keys())

In [None]:
np_data_dir_val = "/content/drive/MyDrive/numpy_images/data/valid_subset"
np_annot_dir_val = "/content/drive/MyDrive/numpy_images/annotations/instances_val2014.json"

In [None]:
val_coco = COCO(np_annot_dir_val)
val_dataset = COCODataset(np_data_dir_val, np_annot_dir_val)

loading annotations into memory...
Done (t=9.71s)
creating index...
index created!
loading annotations into memory...
Done (t=5.54s)
creating index...


  0%|          | 117/40504 [00:00<00:34, 1166.52it/s]

index created!


100%|██████████| 40504/40504 [00:30<00:00, 1326.47it/s]


In [None]:
len(val_dataset.ids)

20874

In [None]:
from torch.utils.data import DataLoader
from torch.multiprocessing import Pool, Process, set_start_method
import torch.multiprocessing
cpu_count = torch.multiprocessing.cpu_count()
print('cpus', cpu_count)

try: 
  set_start_method("spawn")
except RuntimeError:
  pass
batch_size = 64

val_dl = torch.utils.data.DataLoader(dataset=val_dataset, 
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=cpu_count,
                                          collate_fn=lambda x: x)

cpus 4


Load Model (If required)

In [None]:
model =  resnet18(in_channels=3,n_classes=99)
model.load_state_dict(torch.load("/content/drive/MyDrive/model/mini-resnet.pth"))



<All keys matched successfully>

Evaluate Model

In [None]:
model_conv_features.eval()

Sequential(
  (0): ResNetEncoder(
    (gate): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (blocks): ModuleList(
      (0): ResNetLayer(
        (blocks): Sequential(
          (0): ResNetBasicBlock(
            (blocks): Sequential(
              (0): Sequential(
                (conv): Conv2dAuto(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (1): ReLU()
              (2): Sequential(
                (conv): Conv2dAuto(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [None]:
features_list = []
labels_list = []
annot_list = []
image_batches = []

# Use GPUs to speed up the inference, this should take around 10 minutes

model_conv_features.to(device)
for batch in tqdm(val_dl):
    image_batch, label_batch, annot_batch = [x[0] for x in batch], [x[1] for x in batch], [x[2] for x in batch]
    image_batch = torch.stack(image_batch).to(device) 

    with torch.no_grad():
        features_batch = model_conv_features(image_batch).flatten(start_dim=1)
    features_list.append(features_batch)
    labels_list.extend(label_batch)
    annot_list.extend(annot_batch)
    image_batches.extend(image_batch)

100%|██████████| 327/327 [43:54<00:00,  8.06s/it]


In [None]:
features = torch.cat(features_list)

In [None]:
torch.equal(features[0], features[1])

False

In [None]:
features.size()

torch.Size([20874, 25088])

Save embeddings

In [None]:
image_map = [item[0]["image_id"] for item in annot_list]
embed_pos_to_image_id = dict(zip(range(len(image_map)), image_map))

In [None]:
torch.load("/content/mini-resnet18_embeddings-v2.pt")

tensor([[0.6256, 0.5984, 0.5980,  ..., 0.1143, 0.1134, 0.1183],
        [0.6259, 0.5989, 0.5987,  ..., 0.1157, 0.1151, 0.1201],
        [0.6367, 0.7119, 0.7162,  ..., 0.0233, 0.1248, 0.0822],
        ...,
        [0.6255, 0.5981, 0.5977,  ..., 0.0514, 0.0568, 0.1260],
        [0.6260, 0.5989, 0.5987,  ..., 0.1107, 0.1105, 0.1157],
        [0.6262, 0.5924, 0.5920,  ..., 0.2199, 0.1700, 0.0825]])

In [None]:
model_name = "mini-resnet18"
torch.save(features,"{}_embeddings-v2.pt".format(model_name))

In [None]:
import json

with open('mini-resnet18_embeddings-v2.json', 'w') as fp:
    json.dump(embed_pos_to_image_id, fp)