## Initialize the NN in pytorch

### For single layer

In [None]:
conv1 = torch.nn.Conv2d(...)
torch.nn.init.xavier_uniform(conv1.weight)

In [None]:
# for LSTM
def init_hidden(self, batch_size):
        h_0 = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))
        c_0 = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size))

### For entire module

In [None]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)

net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
net.apply(init_weights)

In [None]:
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        xavier(m.bias.data)

In [None]:
u_embeddings.weight.data.uniform_(-initrange, initrange)
v_embeddings.weight.data.uniform_(-0, 0)

### Add a bias

In [None]:
conv1.bias.data.fill_(0.01)

## Basic Operation of tensors

In [2]:
import torch

In [3]:
v = torch.arange(9)
v = v.view(3, 3)

In [5]:
v

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.]])

In [11]:
# Gather element
# torch.gather(input, dim, index, out=None)
# out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
# out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
# out[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2

# 0  1
# 4  3
# 8  7
r = torch.gather(v, 1, torch.LongTensor([[0,1],[1,0],[2,1]]))
print (r)

tensor([[ 0.,  1.],
        [ 4.,  3.],
        [ 8.,  7.]])


In [12]:
# Index select
# 0 2
# 3 5
# 6 8
indices = torch.LongTensor([0, 2])
r = torch.index_select(v, 1, indices)
print (r)

tensor([[ 0.,  2.],
        [ 3.,  5.],
        [ 6.,  8.]])


In [16]:
mask = v.ge(3)
r = torch.masked_select(v, mask)
print (r)

tensor([ 3.,  4.,  5.,  6.,  7.,  8.])


In [20]:
# Note to get nonzero pairs with numpy.non_zero, we need to add a transpose
r = torch.nonzero(v)
print (r)

tensor([[ 0,  1],
        [ 0,  2],
        [ 1,  0],
        [ 1,  1],
        [ 1,  2],
        [ 2,  0],
        [ 2,  1],
        [ 2,  2]])


In [21]:
r = torch.clamp(v, min=-0.5, max=0.5)
print (r)

tensor([[ 0.0000,  0.5000,  0.5000],
        [ 0.5000,  0.5000,  0.5000],
        [ 0.5000,  0.5000,  0.5000]])


In [22]:
r = torch.add(v, 10)
print (r)

tensor([[ 10.,  11.,  12.],
        [ 13.,  14.,  15.],
        [ 16.,  17.,  18.]])


In [23]:
r = v+3
r

tensor([[  3.,   4.,   5.],
        [  6.,   7.,   8.],
        [  9.,  10.,  11.]])

In [24]:
# L-P norm
r = torch.dist(v, v+3, p=2)
r

tensor(9.)

In [25]:
r = torch.mean(v, 1)
r

tensor([ 1.,  4.,  7.])

In [26]:
r = torch.eq(v, v)
r

tensor([[ 1,  1,  1],
        [ 1,  1,  1],
        [ 1,  1,  1]], dtype=torch.uint8)

In [30]:
r = torch.max(v, 1)
r
# first tuple stores the result, second tuple stores the indexes

(tensor([ 2.,  5.,  8.]), tensor([ 2,  2,  2]))

In [31]:
r = torch.sort(v, 1)
r

(tensor([[ 0.,  1.,  2.],
         [ 3.,  4.,  5.],
         [ 6.,  7.,  8.]]), tensor([[ 0,  1,  2],
         [ 0,  1,  2],
         [ 0,  1,  2]]))

In [35]:
# k-th element (start from 1) ascending order with corresponding index, along axis
# (1 4 7
# [torch.FloatTensor of size 3]
# , 1 1 1
# [torch.LongTensor of size 3]
# )
r = torch.kthvalue(v, 2, 0)
print (r)
r = torch.kthvalue(v, 2, 1)
print (r)

(tensor([ 3.,  4.,  5.]), tensor([ 1,  1,  1]))
(tensor([ 1.,  4.,  7.]), tensor([ 1,  1,  1]))


In [42]:
# Top k (descending order) , (input, k, axis)
# (
#  2  5  8
# [torch.FloatTensor of size 3x1]
# ,
#  2  2  2
# [torch.LongTensor of size 3x1]
# )
r = torch.topk(v, 1, 1)
print  (r)

r = torch.topk(v,2,0)
print (r)

(tensor([[ 2.],
        [ 5.],
        [ 8.]]), tensor([[ 2],
        [ 2],
        [ 2]]))
(tensor([[ 6.,  7.,  8.],
        [ 3.,  4.,  5.]]), tensor([[ 2,  2,  2],
        [ 1,  1,  1]]))


In [44]:
#multiplication
mat = torch.randn(2, 4)
vec = torch.randn(4)
r = torch.mv(mat, vec)
r.shape

torch.Size([2])

In [48]:
# Batch multiplication
batch1 = torch.randn(10, 3, 4)
batch2 = torch.randn(10, 4, 5)
r = torch.bmm(batch1, batch2)
r.shape

torch.Size([10, 3, 5])

In [None]:
>>> x = torch.Tensor([[1], [2], [3]])
>>> x.size()
torch.Size([3, 1])
>>> x.expand(3, 4)
 1  1  1  1
 2  2  2  2
 3  3  3  3
[torch.FloatTensor of size 3x4]
>>> x.expand(-1, 4)   # -1 means not changing the size of that dimension
 1  1  1  1
 2  2  2  2
 3  3  3  3
[torch.FloatTensor of size 3x4]


## Sort a list and unsort it back (useful in pack and padding)

In [49]:
A = [1,4,2,7,9]

In [52]:
torch.Tensor(A)

tensor([ 1.,  4.,  2.,  7.,  9.])

In [65]:
# 1D case: use dim = 0 not dim =1
_,idx_sort = torch.sort(torch.Tensor(A),dim = 0, descending=True)
idx_sort

tensor([ 4,  3,  1,  2,  0])

In [71]:
C = torch.Tensor(A)[idx_sort]
C

tensor([ 9.,  7.,  4.,  2.,  1.])

In [66]:
_,idx_unsort = torch.sort(idx_sort,dim = 0)
idx_unsort

tensor([ 4,  2,  3,  1,  0])

In [70]:
D = C[idx_unsort]
D

tensor([ 1.,  4.,  2.,  7.,  9.])

## grad clipping 

In [None]:
'''''Parameters:parameters (Iterable[Tensor] or Tensor) – an iterable of Tensors or a single Tensor that will have gradients normalized
clip_value (float or int) – maximum allowed value of the gradients The gradients are clipped in the range [-clip_value, clip_value]"
'''

torch.nn.utils.clip_grad_value_(parameters, clip_value)

## Data Loader

#### class torch.utils.data.Dataset

    An abstract class representing a Dataset.

    All other datasets should subclass it. All subclasses should override __len__, that provides the size of the dataset, and __getitem__, supporting integer indexing in range from 0 to len(self) exclusive.


In [None]:
class CoCoDataset(data.Dataset):
    
    def __init__(self, transform, mode, batch_size, vocab_threshold, vocab_file, start_word, 
        end_word, unk_word, annotations_file, vocab_from_file, img_folder):
        self.transform = transform
        self.mode = mode
        self.batch_size = batch_size
        self.vocab = Vocabulary(vocab_threshold, vocab_file, start_word,
            end_word, unk_word, annotations_file, vocab_from_file)
        self.img_folder = img_folder
        if self.mode == 'train':
            self.coco = COCO(annotations_file)
            self.ids = list(self.coco.anns.keys())
            print('Obtaining caption lengths...')
            all_tokens = [nltk.tokenize.word_tokenize(str(self.coco.anns[self.ids[index]]['caption']).lower()) for index in tqdm(np.arange(len(self.ids)))]
            self.caption_lengths = [len(token) for token in all_tokens]
        else:
            test_info = json.loads(open(annotations_file).read())
            self.paths = [item['file_name'] for item in test_info['images']]
        
    def __getitem__(self, index):
        # obtain image and caption if in training mode
        if self.mode == 'train':
            ann_id = self.ids[index]
            caption = self.coco.anns[ann_id]['caption']
            img_id = self.coco.anns[ann_id]['image_id']
            path = self.coco.loadImgs(img_id)[0]['file_name']

            # Convert image to tensor and pre-process using transform
            image = Image.open(os.path.join(self.img_folder, path)).convert('RGB')
            image = self.transform(image)

            # Convert caption to tensor of word ids.
            tokens = nltk.tokenize.word_tokenize(str(caption).lower())
            caption = []
            caption.append(self.vocab(self.vocab.start_word))
            caption.extend([self.vocab(token) for token in tokens])
            caption.append(self.vocab(self.vocab.end_word))
            caption = torch.Tensor(caption).long()

            # return pre-processed image and caption tensors
            return image, caption

        # obtain image if in test mode
        else:
            path = self.paths[index]

            # Convert image to tensor and pre-process using transform
            PIL_image = Image.open(os.path.join(self.img_folder, path)).convert('RGB')
            orig_image = np.array(PIL_image)
            image = self.transform(PIL_image)

            # return original image and pre-processed image tensor
            return orig_image, image

    def get_train_indices(self):
        sel_length = np.random.choice(self.caption_lengths)
        all_indices = np.where([self.caption_lengths[i] == sel_length for i in np.arange(len(self.caption_lengths))])[0]
        indices = list(np.random.choice(all_indices, size=self.batch_size))
        return indices

    def __len__(self):
        if self.mode == 'train':
            return len(self.ids)
        else:
            return len(self.paths)

#### data loader source code

In [None]:

#BatchSampler 是一个普通 Sampler 的 wrapper， 普通Sampler 一次仅产生一个 index， 而 BatchSampler 一次产生一个 batch 的 indices。
class DataLoader(object):
    def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, 
                 batch_sampler=None,
                 num_workers=0, collate_fn=default_collate, pin_memory=False, 
                 drop_last=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.collate_fn = collate_fn
        self.pin_memory = pin_memory
        self.drop_last = drop_last

        if batch_sampler is not None:
            if batch_size > 1 or shuffle or sampler is not None or drop_last:
                raise ValueError('batch_sampler is mutually exclusive with '
                                 'batch_size, shuffle, sampler, and drop_last')

        if sampler is not None and shuffle:
            raise ValueError('sampler is mutually exclusive with shuffle')

        if batch_sampler is None:
            if sampler is None:
                if shuffle:
                    # dataset.__len__() 在 Sampler 中被使用。
                    # 目的是生成一个 长度为 len(dataset) 的 序列索引（随机的）。
                    sampler = RandomSampler(dataset)
                else:
                    # dataset.__len__() 在 Sampler 中被使用。
                    # 目的是生成一个 长度为 len(dataset) 的 序列索引（顺序的）。
                    sampler = SequentialSampler(dataset)
            # Sampler 是个迭代器，一次之只返回一个 索引
            # BatchSampler 也是个迭代器，但是一次返回 batch_size 个 索引
            ## (batch_sampler (Sampler, optional) – like sampler, but returns a batch of indices at a time.
            ## Mutually exclusive with batch_size, shuffle, sampler, and drop_last.)
            batch_sampler = BatchSampler(sampler, batch_size, drop_last)

        self.sampler = sampler
        self.batch_sampler = batch_sampler

    def __iter__(self):
        return DataLoaderIter(self)

    def __len__(self):
        return len(self.batch_sampler)

In [None]:
# sample of data_loader
def get_loader(transform,
               mode='train',
               batch_size=1,
               vocab_threshold=None,
               vocab_file='./vocab.pkl',
               start_word="<start>",
               end_word="<end>",
               unk_word="<unk>",
               vocab_from_file=True,
               num_workers=0,
               cocoapi_loc='.'):
    """Returns the data loader.
    Args:
      transform: Image transform.
      mode: One of 'train' or 'test'.
      batch_size: Batch size (if in testing mode, must have batch_size=1).
      vocab_threshold: Minimum word count threshold.
      vocab_file: File containing the vocabulary. 
      start_word: Special word denoting sentence start.
      end_word: Special word denoting sentence end.
      unk_word: Special word denoting unknown words.
      vocab_from_file: If False, create vocab from scratch & override any existing vocab_file.
                       If True, load vocab from from existing vocab_file, if it exists.
      num_workers: Number of subprocesses to use for data loading 
      cocoapi_loc: The location of the folder containing the COCO API: https://github.com/cocodataset/cocoapi
    """
    
    assert mode in ['train', 'test'], "mode must be one of 'train' or 'test'."
    if vocab_from_file==False: assert mode=='train', "To generate vocab from captions file, must be in training mode (mode='train')."

    # Based on mode (train, val, test), obtain img_folder and annotations_file.
    if mode == 'train':
        if vocab_from_file==True: assert os.path.exists(vocab_file), "vocab_file does not exist.  Change vocab_from_file to False to create vocab_file."
        img_folder = os.path.join(cocoapi_loc, 'cocoapi/images/train2014/')
        annotations_file = os.path.join(cocoapi_loc, 'cocoapi/annotations/captions_train2014.json')
    if mode == 'test':
        assert batch_size==1, "Please change batch_size to 1 if testing your model."
        assert os.path.exists(vocab_file), "Must first generate vocab.pkl from training data."
        assert vocab_from_file==True, "Change vocab_from_file to True."
        img_folder = os.path.join(cocoapi_loc, 'cocoapi/images/test2014/')
        annotations_file = os.path.join(cocoapi_loc, 'cocoapi/annotations/image_info_test2014.json')
        
     # COCO caption dataset.
    dataset = CoCoDataset(transform=transform,
                          mode=mode,
                          batch_size=batch_size,
                          vocab_threshold=vocab_threshold,
                          vocab_file=vocab_file,
                          start_word=start_word,
                          end_word=end_word,
                          unk_word=unk_word,
                          annotations_file=annotations_file,
                          vocab_from_file=vocab_from_file,
                          img_folder=img_folder)

    if mode == 'train':
        # Randomly sample a caption length, and sample indices with that length.
        indices = dataset.get_train_indices()
        # Create and assign a batch sampler to retrieve a batch with the sampled indices.
        initial_sampler = data.sampler.SubsetRandomSampler(indices=indices)
        # data loader for COCO dataset.
        data_loader = data.DataLoader(dataset=dataset, 
                                      num_workers=num_workers,
                                      batch_sampler=data.sampler.BatchSampler(sampler=initial_sampler,
                                                                              batch_size=dataset.batch_size,
                                                                              drop_last=False))
    else:
        data_loader = data.DataLoader(dataset=dataset,
                                      batch_size=dataset.batch_size,
                                      shuffle=True,
                                      num_workers=num_workers)

    return data_loader



#### To load the data from dataloader

In [None]:
# 以下两个代码是等价的
for data in dataloader:
    ...
# 等价与
iterr = iter(dataloader)
while True:
    try:
        next(iterr)
    except StopIteration:
        break

## Encoder

In [132]:
#sample 1
import torch
import torch.nn as nn
import torchvision.models as models
class EncoderCNN(nn.Module):
    def __init__(self, embed_size):
        super(EncoderCNN, self).__init__()
        resnet = models.resnet50(pretrained=True)
        for param in resnet.parameters():
            param.requires_grad_(False)
            
        
        
        modules = list(resnet.children())[:-1]
        self.resnet = nn.Sequential(*modules)
        self.embed = nn.Linear(resnet.fc.in_features, embed_size)

    def forward(self, images):
        features = self.resnet(images)
        features = features.view(features.size(0), -1)
        features = self.embed(features)
        return features

## Decoder

In [133]:
class DecoderRNN(nn.Module):
    
    def __init__(self, embed_size, hidden_size, vocab_size, num_layers=1, drop_prob = 0.4):
        super(DecoderRNN, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, dropout = drop_prob, batch_first=True)
        self.linear = nn.Linear(hidden_size, vocab_size)
        
    def forward(self, features, captions):
        captions = captions[:,:-1]
        embeddings = self.embed(captions)
        embeddings = torch.cat((features.unsqueeze(1), embeddings), 1)
        #print (embeddings.shape)
        #embeddings = embeddings.permute(1,0,2)
        #print (embeddings.shape)
        out,_ = self.lstm(embeddings)
        #out = out.transpose(0,1)
        #print (out.shape)
        out = self.linear(out)
        
        return out
    
    
    def sample(self, inputs, states=None, max_len=20):
        " accepts pre-processed image tensor (inputs) and returns predicted sentence (list of tensor ids of length max_len) "
        created_wordid = []
        
        A = random.randint(10,max_len)
        for i in range(A):
            out,_ = self.lstm(inputs)
            output =self.linear(out.squeeze(1))
            #output = self.linear(hiddens)
            #output = output.squeeze(1)
            wordid = output.max(1)[1]
            prediction = wordid.item()
            created_wordid.append(prediction)
            inputs = self.embed(wordid)
            inputs = inputs.unsqueeze(1)
        return created_wordid

## Learning rate decay

In [1]:
import torch

In [None]:
def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1,
                      max_iter=100, power=0.9):
    """Polynomial decay of learning rate
        :param init_lr is base learning rate
        :param iter is a current iteration
        :param lr_decay_iter how frequently decay occurs, default is 1
        :param max_iter is number of maximum iterations
        :param power is a polymomial power

    """
    if iter % lr_decay_iter or iter > max_iter:
        return optimizer

    lr = init_lr*(1 - iter/max_iter)**power
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return lr