In [1]:
import torch
from torchvision import models
import torch.utils.model_zoo as model_zoo
from torchsummary import summary

from catr.models.backbone import BackboneV2

# import torch
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn

import numpy as np
import time
import sys
import os
import math
import tqdm
import timeit
from datetime import datetime
import dateutil.tz

from catr.models import utils, caption
from catr.datasets import coco
from catr.cfg_damsm_vocab import Config
# from catr.engine import train_one_epoch, evaluate

from torch.utils.tensorboard import SummaryWriter
from copy import deepcopy

In [2]:
config = Config()
model, criterion = caption.build_model_v2(config)



In [3]:
print(model)

CaptionV2(
  (backbone): JoinerV2(
    (0): BackboneV2(
      (cnn_enc): CNN_ENCODER(
        (Conv2d_1a_3x3): BasicConv2d(
          (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        )
        (Conv2d_2a_3x3): BasicConv2d(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        )
        (Conv2d_2b_3x3): BasicConv2d(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        )
        (Conv2d_3b_1x1): BasicConv2d(
          (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        )
   

## Initialize model from checkpoint v3 (with different vocab_size)

In [8]:
checkv3 = torch.load('catr/checkpoint_v3.pth', map_location='cpu')
print(checkv3['model'].keys())

odict_keys(['backbone.0.body.conv1.weight', 'backbone.0.body.bn1.weight', 'backbone.0.body.bn1.bias', 'backbone.0.body.bn1.running_mean', 'backbone.0.body.bn1.running_var', 'backbone.0.body.layer1.0.conv1.weight', 'backbone.0.body.layer1.0.bn1.weight', 'backbone.0.body.layer1.0.bn1.bias', 'backbone.0.body.layer1.0.bn1.running_mean', 'backbone.0.body.layer1.0.bn1.running_var', 'backbone.0.body.layer1.0.conv2.weight', 'backbone.0.body.layer1.0.bn2.weight', 'backbone.0.body.layer1.0.bn2.bias', 'backbone.0.body.layer1.0.bn2.running_mean', 'backbone.0.body.layer1.0.bn2.running_var', 'backbone.0.body.layer1.0.conv3.weight', 'backbone.0.body.layer1.0.bn3.weight', 'backbone.0.body.layer1.0.bn3.bias', 'backbone.0.body.layer1.0.bn3.running_mean', 'backbone.0.body.layer1.0.bn3.running_var', 'backbone.0.body.layer1.0.downsample.0.weight', 'backbone.0.body.layer1.0.downsample.1.weight', 'backbone.0.body.layer1.0.downsample.1.bias', 'backbone.0.body.layer1.0.downsample.1.running_mean', 'backbone.0.b

In [9]:
# For different vocab size, the last layer dimension needs adjustment
print(checkv3['model']['mlp.layers.2.weight'].shape, checkv3['model']['mlp.layers.2.bias'].shape)
print(model.mlp.layers[2].weight.shape, model.mlp.layers[2].bias.shape, model.transformer.embeddings.word_embeddings.weight.shape)

torch.Size([30522, 512]) torch.Size([30522])
torch.Size([27300, 512]) torch.Size([27300]) torch.Size([27300, 256])


In [10]:
# cannot directly load from pretrained checkpoint_v3 which used bert tokenizer with the output 30522 
# model.load_state_dict(checkv3['model'], strict=False)
vocab_len = config.vocab_size # 27300
old_len = checkv3['model']['mlp.layers.2.bias'].shape[0]
print(vocab_len, old_len)
checkv3_bk = deepcopy(checkv3)
if vocab_len < old_len:
    print('adjust size ...')
    idx = np.array([0] + list(range(1000, vocab_len+996)) + [101,102,100], dtype=int)
    print(len(idx))
    checkv3['model']['mlp.layers.2.weight'] = checkv3['model']['mlp.layers.2.weight'][idx,:]
    checkv3['model']['mlp.layers.2.bias'] = checkv3['model']['mlp.layers.2.bias'][idx]
    checkv3['model']['transformer.embeddings.word_embeddings.weight'] = checkv3['model']['transformer.embeddings.word_embeddings.weight'][idx,:]
print(checkv3['model']['mlp.layers.2.weight'].shape, 
      checkv3['model']['mlp.layers.2.bias'].shape, 
      checkv3['model']['transformer.embeddings.word_embeddings.weight'].shape)

27300 30522
adjust size ...
27300
torch.Size([27300, 512]) torch.Size([27300]) torch.Size([27300, 256])


In [12]:
# after loading adjusted state_dict
i, k = 27299, 100 # id in vocab, id in bert
model.load_state_dict(checkv3['model'], strict=False)
print(model.mlp.layers[2].weight[i, :100])
print(checkv3_bk['model']['mlp.layers.2.weight'][k, :100])
print(model.mlp.layers[2].bias[i])
print(checkv3_bk['model']['mlp.layers.2.bias'][k])
print(model.transformer.embeddings.word_embeddings.weight[i, :100])
print(checkv3_bk['model']['transformer.embeddings.word_embeddings.weight'][k, :100])

tensor([-0.1392,  0.0033, -0.1229, -0.0137, -0.0461, -0.1053, -0.0669, -0.0748,
        -0.1033, -0.0646, -0.0256, -0.0220, -0.0272, -0.0683, -0.0227, -0.0830,
        -0.1196, -0.1208, -0.1496, -0.1115, -0.1036, -0.0703, -0.1421, -0.0801,
         0.0147, -0.6438, -0.1358, -0.1388, -0.0398, -0.0976, -0.1333, -0.0378,
        -0.1344, -0.1401, -0.0896, -0.1210, -0.0559, -0.1757, -0.1460, -0.0610,
        -0.0617, -0.1013, -0.1392, -0.1495, -0.0939, -0.0596, -0.1333, -0.1642,
        -0.0784, -0.0443, -0.0980, -0.2780, -0.2274, -0.0142, -0.0548, -0.0736,
        -0.1413, -0.0512, -0.0100, -0.0432, -0.4243, -0.0268, -0.0347, -0.0798,
        -0.2291, -0.1996, -0.0357, -0.4141, -0.1359, -0.1373, -0.0805, -0.0853,
        -0.0215, -0.0566, -0.1196, -0.0611, -0.0482, -0.0303, -0.0700, -0.0796,
        -0.0731, -0.0498, -0.0683, -0.0882, -0.1050, -0.0363, -0.1059, -0.0866,
        -0.1250, -0.0960, -0.0870, -0.0777, -0.1483, -0.0577, -0.1220, -0.0675,
        -0.0121, -0.1851, -0.1125, -0.26

## Others

In [4]:
model = BackboneV2()



KeyError: 'model'

In [16]:
summary(model.cuda(), (3,299,299))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 149, 149]             864
       BatchNorm2d-2         [-1, 32, 149, 149]              64
       BasicConv2d-3         [-1, 32, 149, 149]               0
            Conv2d-4         [-1, 32, 147, 147]           9,216
       BatchNorm2d-5         [-1, 32, 147, 147]              64
       BasicConv2d-6         [-1, 32, 147, 147]               0
            Conv2d-7         [-1, 64, 147, 147]          18,432
       BatchNorm2d-8         [-1, 64, 147, 147]             128
       BasicConv2d-9         [-1, 64, 147, 147]               0
        MaxPool2d-10           [-1, 64, 73, 73]               0
           Conv2d-11           [-1, 80, 73, 73]           5,120
      BatchNorm2d-12           [-1, 80, 73, 73]             160
      BasicConv2d-13           [-1, 80, 73, 73]               0
           Conv2d-14          [-1, 192,

In [17]:
print(model)

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [38]:
model = models.inception_v3(pretrained=True)
# url = 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth'
# model.load_state_dict(model_zoo.load_url(url))
model

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [25]:
model.state_dict().keys()

odict_keys(['Conv2d_1a_3x3.conv.weight', 'Conv2d_1a_3x3.bn.weight', 'Conv2d_1a_3x3.bn.bias', 'Conv2d_1a_3x3.bn.running_mean', 'Conv2d_1a_3x3.bn.running_var', 'Conv2d_1a_3x3.bn.num_batches_tracked', 'Conv2d_2a_3x3.conv.weight', 'Conv2d_2a_3x3.bn.weight', 'Conv2d_2a_3x3.bn.bias', 'Conv2d_2a_3x3.bn.running_mean', 'Conv2d_2a_3x3.bn.running_var', 'Conv2d_2a_3x3.bn.num_batches_tracked', 'Conv2d_2b_3x3.conv.weight', 'Conv2d_2b_3x3.bn.weight', 'Conv2d_2b_3x3.bn.bias', 'Conv2d_2b_3x3.bn.running_mean', 'Conv2d_2b_3x3.bn.running_var', 'Conv2d_2b_3x3.bn.num_batches_tracked', 'Conv2d_3b_1x1.conv.weight', 'Conv2d_3b_1x1.bn.weight', 'Conv2d_3b_1x1.bn.bias', 'Conv2d_3b_1x1.bn.running_mean', 'Conv2d_3b_1x1.bn.running_var', 'Conv2d_3b_1x1.bn.num_batches_tracked', 'Conv2d_4a_3x3.conv.weight', 'Conv2d_4a_3x3.bn.weight', 'Conv2d_4a_3x3.bn.bias', 'Conv2d_4a_3x3.bn.running_mean', 'Conv2d_4a_3x3.bn.running_var', 'Conv2d_4a_3x3.bn.num_batches_tracked', 'Mixed_5b.branch1x1.conv.weight', 'Mixed_5b.branch1x1.bn.w

In [35]:
model.Mixed_5c.branch3x3dbl_3.bn.bias

Parameter containing:
tensor([-0.1332, -1.4165, -2.1220, -0.5219, -0.3263, -0.4121, -0.5234, -0.6709,
        -0.7239, -0.4627,  0.5927, -0.4952, -0.8557, -0.6593, -0.1589, -0.3235,
        -0.4278, -0.5790,  0.6059, -0.2793, -0.4762,  0.7706, -0.9461, -1.3227,
        -0.6810, -0.2005, -0.6125, -0.4821, -0.4358, -0.4265, -0.3891, -1.5254,
         0.8636, -0.3304, -0.2961, -0.4170, -0.5371, -0.5512, -0.1721, -0.6469,
         1.2289, -0.4431, -0.4586, -0.2651, -0.6495,  0.4325,  1.5490,  0.0655,
        -0.5075, -1.1048,  0.5074, -0.3161, -1.0042, -0.7891, -0.5905, -0.3905,
        -0.0081, -0.7208, -0.2287,  0.4923, -0.4943, -0.6574, -0.3799, -0.3668,
        -0.0624, -0.4536,  0.1261, -0.7905, -0.4366, -0.5918, -0.9163, -0.5323,
        -0.4310, -0.0776, -0.2986, -0.7487,  0.0170,  0.4351, -0.3087, -0.8056,
        -0.5941, -0.8872, -0.1320, -0.1090, -0.7753,  2.0760, -0.3034, -0.1400,
         0.2682, -0.5642, -0.6089, -0.3111, -0.6588, -0.5696,  0.3600, -0.5412],
       requires_g

In [16]:
checkpoint = torch.load('/media/MyDataStor1/mmrl/MMRL/DAMSMencoders/coco/image_encoder100.pth', map_location='cpu')

In [9]:
checkpoint.keys()

odict_keys(['Conv2d_1a_3x3.conv.weight', 'Conv2d_1a_3x3.bn.weight', 'Conv2d_1a_3x3.bn.bias', 'Conv2d_1a_3x3.bn.running_mean', 'Conv2d_1a_3x3.bn.running_var', 'Conv2d_1a_3x3.bn.num_batches_tracked', 'Conv2d_2a_3x3.conv.weight', 'Conv2d_2a_3x3.bn.weight', 'Conv2d_2a_3x3.bn.bias', 'Conv2d_2a_3x3.bn.running_mean', 'Conv2d_2a_3x3.bn.running_var', 'Conv2d_2a_3x3.bn.num_batches_tracked', 'Conv2d_2b_3x3.conv.weight', 'Conv2d_2b_3x3.bn.weight', 'Conv2d_2b_3x3.bn.bias', 'Conv2d_2b_3x3.bn.running_mean', 'Conv2d_2b_3x3.bn.running_var', 'Conv2d_2b_3x3.bn.num_batches_tracked', 'Conv2d_3b_1x1.conv.weight', 'Conv2d_3b_1x1.bn.weight', 'Conv2d_3b_1x1.bn.bias', 'Conv2d_3b_1x1.bn.running_mean', 'Conv2d_3b_1x1.bn.running_var', 'Conv2d_3b_1x1.bn.num_batches_tracked', 'Conv2d_4a_3x3.conv.weight', 'Conv2d_4a_3x3.bn.weight', 'Conv2d_4a_3x3.bn.bias', 'Conv2d_4a_3x3.bn.running_mean', 'Conv2d_4a_3x3.bn.running_var', 'Conv2d_4a_3x3.bn.num_batches_tracked', 'Mixed_5b.branch1x1.conv.weight', 'Mixed_5b.branch1x1.bn.w

In [18]:
checkpoint['Mixed_6d.branch7x7_1.bn.bias']

tensor([-0.3038, -0.3935, -0.8347, -0.6168, -0.7528, -0.5061, -0.5273, -0.4462,
        -0.3684, -0.3500, -0.6514, -0.4254, -0.1149, -0.5038, -0.5992,  0.0271,
         0.1165, -0.5057, -0.9933, -1.1629,  0.2665, -0.3352, -0.5806, -0.2742,
        -0.9306, -0.4823, -0.6305, -0.7498, -0.2391, -0.5330, -0.7853, -0.6176,
        -0.7910, -0.8601, -0.9077, -1.2353, -0.4963, -0.5646, -0.2665, -0.3931,
        -0.5994, -0.1924, -0.4170, -0.4284, -0.5277, -0.4446, -0.6691, -1.0753,
        -0.8602, -0.3220, -0.9775, -0.5960, -0.4193, -0.6001, -1.1988, -0.5442,
        -0.1368, -0.2941, -0.2039, -0.0497, -0.4912,  0.0500, -0.3545, -1.1346,
        -0.4377, -1.0108, -1.4610, -0.3534, -0.4471, -0.2868, -0.2570, -0.5407,
        -0.6931, -0.5422, -0.7062, -0.6923, -0.4727, -0.6145, -0.4487, -0.9601,
        -0.9830, -0.6097, -0.3383, -0.5688, -0.5567, -0.6994, -0.5604, -0.1396,
        -0.3552, -0.7040, -0.4015, -0.6677, -1.1397, -0.7619, -0.5937, -0.9238,
        -0.6770, -0.4494, -0.5734, -0.19

In [40]:
check = torch.load('catr/checkpoint_v3.pth', map_location='cpu')

In [41]:
check['model'].keys()

odict_keys(['backbone.0.body.conv1.weight', 'backbone.0.body.bn1.weight', 'backbone.0.body.bn1.bias', 'backbone.0.body.bn1.running_mean', 'backbone.0.body.bn1.running_var', 'backbone.0.body.layer1.0.conv1.weight', 'backbone.0.body.layer1.0.bn1.weight', 'backbone.0.body.layer1.0.bn1.bias', 'backbone.0.body.layer1.0.bn1.running_mean', 'backbone.0.body.layer1.0.bn1.running_var', 'backbone.0.body.layer1.0.conv2.weight', 'backbone.0.body.layer1.0.bn2.weight', 'backbone.0.body.layer1.0.bn2.bias', 'backbone.0.body.layer1.0.bn2.running_mean', 'backbone.0.body.layer1.0.bn2.running_var', 'backbone.0.body.layer1.0.conv3.weight', 'backbone.0.body.layer1.0.bn3.weight', 'backbone.0.body.layer1.0.bn3.bias', 'backbone.0.body.layer1.0.bn3.running_mean', 'backbone.0.body.layer1.0.bn3.running_var', 'backbone.0.body.layer1.0.downsample.0.weight', 'backbone.0.body.layer1.0.downsample.1.weight', 'backbone.0.body.layer1.0.downsample.1.bias', 'backbone.0.body.layer1.0.downsample.1.running_mean', 'backbone.0.b

## Check saved checkpoints and parameters updated

In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms

import numpy as np
import time
import sys
import os
import math
import tqdm

from nltk.tokenize import RegexpTokenizer
from transformers import BertTokenizer, AutoTokenizer
from PIL import Image
import argparse

from catr.models import caption
from catr.models import utils as mtils
from catr.datasets import coco, utils
from catr.cfg_damsm_bert import Config

import json, pickle
from pycocotools.coco import COCO as CC
import matplotlib.pyplot as plt

In [3]:
config = Config()

model, criterion = caption.build_model_v2(config)
print("Initializing from Checkpoint V3...")
checkv3 = torch.load('catr/checkpoint_v3.pth', map_location='cpu')
model.load_state_dict(checkv3['model'], strict=False)

In [11]:
damsm = torch.load('/media/MyDataStor1/mmrl/MMRL/DAMSMencoders/coco/image_encoder240.pth', map_location='cpu')
print(damsm.keys())

odict_keys(['Conv2d_1a_3x3.conv.weight', 'Conv2d_1a_3x3.bn.weight', 'Conv2d_1a_3x3.bn.bias', 'Conv2d_1a_3x3.bn.running_mean', 'Conv2d_1a_3x3.bn.running_var', 'Conv2d_1a_3x3.bn.num_batches_tracked', 'Conv2d_2a_3x3.conv.weight', 'Conv2d_2a_3x3.bn.weight', 'Conv2d_2a_3x3.bn.bias', 'Conv2d_2a_3x3.bn.running_mean', 'Conv2d_2a_3x3.bn.running_var', 'Conv2d_2a_3x3.bn.num_batches_tracked', 'Conv2d_2b_3x3.conv.weight', 'Conv2d_2b_3x3.bn.weight', 'Conv2d_2b_3x3.bn.bias', 'Conv2d_2b_3x3.bn.running_mean', 'Conv2d_2b_3x3.bn.running_var', 'Conv2d_2b_3x3.bn.num_batches_tracked', 'Conv2d_3b_1x1.conv.weight', 'Conv2d_3b_1x1.bn.weight', 'Conv2d_3b_1x1.bn.bias', 'Conv2d_3b_1x1.bn.running_mean', 'Conv2d_3b_1x1.bn.running_var', 'Conv2d_3b_1x1.bn.num_batches_tracked', 'Conv2d_4a_3x3.conv.weight', 'Conv2d_4a_3x3.bn.weight', 'Conv2d_4a_3x3.bn.bias', 'Conv2d_4a_3x3.bn.running_mean', 'Conv2d_4a_3x3.bn.running_var', 'Conv2d_4a_3x3.bn.num_batches_tracked', 'Mixed_5b.branch1x1.conv.weight', 'Mixed_5b.branch1x1.bn.w

In [4]:
print(model.state_dict().keys())

odict_keys(['backbone.0.cnn_enc.Conv2d_1a_3x3.conv.weight', 'backbone.0.cnn_enc.Conv2d_1a_3x3.bn.weight', 'backbone.0.cnn_enc.Conv2d_1a_3x3.bn.bias', 'backbone.0.cnn_enc.Conv2d_1a_3x3.bn.running_mean', 'backbone.0.cnn_enc.Conv2d_1a_3x3.bn.running_var', 'backbone.0.cnn_enc.Conv2d_1a_3x3.bn.num_batches_tracked', 'backbone.0.cnn_enc.Conv2d_2a_3x3.conv.weight', 'backbone.0.cnn_enc.Conv2d_2a_3x3.bn.weight', 'backbone.0.cnn_enc.Conv2d_2a_3x3.bn.bias', 'backbone.0.cnn_enc.Conv2d_2a_3x3.bn.running_mean', 'backbone.0.cnn_enc.Conv2d_2a_3x3.bn.running_var', 'backbone.0.cnn_enc.Conv2d_2a_3x3.bn.num_batches_tracked', 'backbone.0.cnn_enc.Conv2d_2b_3x3.conv.weight', 'backbone.0.cnn_enc.Conv2d_2b_3x3.bn.weight', 'backbone.0.cnn_enc.Conv2d_2b_3x3.bn.bias', 'backbone.0.cnn_enc.Conv2d_2b_3x3.bn.running_mean', 'backbone.0.cnn_enc.Conv2d_2b_3x3.bn.running_var', 'backbone.0.cnn_enc.Conv2d_2b_3x3.bn.num_batches_tracked', 'backbone.0.cnn_enc.Conv2d_3b_1x1.conv.weight', 'backbone.0.cnn_enc.Conv2d_3b_1x1.bn.wei

In [5]:
check04 = torch.load('catr/checkpoints/damsm_256_256_coco2014_ep04.pth', map_location='cpu')

In [12]:
# check frozen cnn encoder parameters
print(damsm['emb_features.weight'][0, :100, 0, 0])
print(model.backbone[0].cnn_enc.emb_features.weight[0, :100, 0, 0])
print(check04['model']['backbone.0.cnn_enc.emb_features.weight'][0, :100, 0, 0])

tensor([-0.0590, -0.1501, -0.0086, -0.0850, -0.0030, -0.0010,  0.0194, -0.0600,
         0.0786, -0.1096,  0.0565,  0.0559,  0.0719, -0.0230,  0.0372, -0.0197,
        -0.1296, -0.0060, -0.0792,  0.0665,  0.0838,  0.0810,  0.0181,  0.0207,
         0.0948, -0.1053,  0.0784,  0.0048,  0.0373, -0.0163,  0.0286, -0.0456,
        -0.0563, -0.1205, -0.0651,  0.0082, -0.1173,  0.0254, -0.0026, -0.0733,
         0.0523, -0.0199,  0.0694,  0.0424,  0.0095, -0.0098,  0.0124, -0.0710,
        -0.0034, -0.0253,  0.0016,  0.0536, -0.0715, -0.0145, -0.0640, -0.0334,
        -0.1101,  0.0335,  0.0679, -0.0335,  0.0294,  0.0346, -0.0173,  0.0762,
        -0.0920, -0.0331,  0.0653,  0.1155, -0.0199, -0.0561,  0.0788, -0.0913,
        -0.0656, -0.0290,  0.1160,  0.0224,  0.1160, -0.0726,  0.0077, -0.0680,
        -0.0003, -0.0394, -0.0661,  0.0265,  0.0790,  0.1629, -0.0051,  0.0287,
         0.1102, -0.0342,  0.0832, -0.0706,  0.0650, -0.0732, -0.0032,  0.0829,
         0.1496, -0.0425,  0.0960, -0.00

In [17]:
print(model.input_proj_v2.bias[:100])
print(check04['model']['input_proj_v2.bias'][:100])

tensor([-0.0245, -0.0136, -0.0128, -0.0173, -0.0078,  0.0242, -0.0347,  0.0597,
        -0.0166,  0.0004, -0.0201,  0.0144, -0.0124,  0.0543, -0.0583,  0.0570,
        -0.0302,  0.0088, -0.0501,  0.0138, -0.0172,  0.0271,  0.0002,  0.0500,
         0.0201,  0.0036, -0.0472,  0.0456,  0.0228, -0.0202,  0.0211, -0.0101,
         0.0018, -0.0431,  0.0571, -0.0534, -0.0027, -0.0197, -0.0084, -0.0124,
         0.0044, -0.0367, -0.0387, -0.0154, -0.0258,  0.0550,  0.0079,  0.0597,
         0.0432, -0.0534,  0.0577, -0.0336,  0.0102,  0.0566,  0.0423, -0.0119,
        -0.0426, -0.0200, -0.0187,  0.0530,  0.0120, -0.0229,  0.0260, -0.0623,
         0.0495, -0.0396,  0.0053,  0.0533,  0.0516,  0.0490, -0.0484,  0.0096,
        -0.0186,  0.0306, -0.0617,  0.0446, -0.0303,  0.0460,  0.0139, -0.0235,
         0.0103,  0.0545,  0.0071, -0.0541, -0.0164, -0.0535,  0.0101,  0.0018,
         0.0254,  0.0414, -0.0573, -0.0496, -0.0346, -0.0251, -0.0419, -0.0032,
        -0.0232, -0.0119, -0.0459, -0.03

## Make dataloader json/pickle； Check dataloader

In [1]:
import json, pickle, os, sys

In [11]:
train_file = os.path.join(
            '/media/MyDataStor1/mmrl/MMRL/data/coco', 'annotations', 'captions_train2014.json')
with open(train_file, 'r') as f:
    d = json.load(f)
print(len(d['annotations']))
print(d.keys())
print(d['annotations'][:2])

414113
dict_keys(['info', 'images', 'licenses', 'annotations'])
[{'image_id': 318556, 'id': 48, 'caption': 'A very clean and well decorated empty bathroom'}, {'image_id': 116100, 'id': 67, 'caption': 'A panoramic view of a kitchen and all of its appliances.'}]


In [6]:
tr_pk_file = os.path.join(
            '/media/MyDataStor1/mmrl/MMRL/data/coco', 'annotations', 'captions_train2014.pickle')
with open(tr_pk_file, 'wb') as f:
    pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
val_file = os.path.join(
            '/media/MyDataStor1/mmrl/MMRL/data/coco', 'annotations', 'captions_val2014.json')
with open(val_file, 'r') as f:
    g = json.load(f)
print(len(g['annotations']))
print(g.keys())
print(g['annotations'][:2])

202654
dict_keys(['info', 'images', 'licenses', 'annotations'])
[{'image_id': 203564, 'id': 37, 'caption': 'A bicycle replica with a clock as the front wheel.'}, {'image_id': 179765, 'id': 38, 'caption': 'A black Honda motorcycle parked in front of a garage.'}]


In [10]:
vl_pk_file = os.path.join(
            '/media/MyDataStor1/mmrl/MMRL/data/coco', 'annotations', 'captions_val2014.pickle')
with open(vl_pk_file, 'wb') as f:
    pickle.dump(g, f, protocol=pickle.HIGHEST_PROTOCOL)