In [1]:
from torchvision import models
from torch.autograd import Variable

## Utilities
import random

## Libraries
import numpy as np


## Torch
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import torch.nn as nn

In [2]:
## Normalization on ImageNet mean/std for finetuning
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

save_dir = './snapshots'
batch_size = 64

In [3]:
# Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)
torch.manual_seed(1337)
torch.cuda.manual_seed(1337)
np.random.seed(1337)
random.seed(1337)

In [4]:
## Normalization only for validation and test
ds_transform_raw = transforms.Compose([
                     transforms.CenterCrop(224),
                     transforms.ToTensor(),
                     normalize
                     ])

In [57]:
# Load model from best iteration
from src.p_neuro import ResNet50

class CNN(nn.Module):
    ## We use ResNet weights from PyCaffe.
    def __init__(self, embed_size):
        super(CNN, self).__init__()
        
        # Loading pretrained ResNet as feature extractor
        original_model = ResNet50(17)
        model_path = './snapshots/2017-05-06_1235-cloud-habitation-PowerPIL-model_best.pth'
        checkpoint = torch.load(model_path)
        original_model.load_state_dict(checkpoint['state_dict'])
        
        # Everything except the last linear layer
        self.features = nn.Sequential(*list(original_model.children())[:-1])
        
        # Freeze those weights
        for p in self.features.parameters():
            p.requires_grad = False

        # Get number of features of last layer
        num_feats = original_model.classifier[0].in_features
            
        self.fc = nn.Linear(num_feats, embed_size)
        self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)

    def forward(self, x):
        f = self.features(x)
        f = f.view(f.size(0), -1)
        out = self.fc(f)
        out = self.bn(out)
        return out

In [58]:
encoderCNN = CNN(2048).cuda()

In [59]:
from torch.nn.init import kaiming_normal

In [94]:
class DecoderRNN(nn.Module):
    def __init__(self, num_feats, num_classes, hidden_size, num_layers):
        super(DecoderRNN, self).__init__()
        self.rnn = nn.GRU(input_size=num_feats,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first = True)
        self.classifier = nn.Linear(hidden_size, num_classes)
        self.hidden_size = hidden_size
        
        # Init of last layer
        kaiming_normal(self.classifier.weight)
    

    def forward(self, feats, hidden=None):
        x, hidden = self.rnn(feats.unsqueeze(1), hidden)
        x = x.view(-1, self.hidden_size)
        x = self.classifier(x)
        return x

In [95]:
decoderRNN = DecoderRNN(2048, 17, 64, 10).cuda()

In [96]:
## Normalization on ImageNet mean/std for finetuning
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

# Note, p_training has lr_decay automated
optimizer = optim.SGD(decoderRNN.parameters(), lr=1e-1, momentum=0.9) # Finetuning whole model

# criterion = ConvolutedLoss()
criterion = torch.nn.MultiLabelSoftMarginLoss(
    weight = torch.from_numpy(
                 1/np.array([1,  3,  2,  1,
                             1,  3,  2,  3,
                             4,  4,  1,  2,
                             1,  1,  3,  4,  1])
    )).float().cuda()

In [97]:
from src.p_data_augmentation import PowerPIL
from src.p2_dataload import KaggleAmazonDataset
from src.p_model_selection import train_valid_split
from src.p_sampler import SubsetSampler, balance_weights

In [98]:
# Setting random seeds for reproducibility. (Caveat, some CuDNN algorithms are non-deterministic)
torch.manual_seed(1337)
torch.cuda.manual_seed(1337)
np.random.seed(1337)
random.seed(1337)

##############################################################
## Loading the dataset

## Augmentation + Normalization for full training
ds_transform_augmented = transforms.Compose([
                 transforms.RandomSizedCrop(224),
                 PowerPIL(),
                 transforms.ToTensor(),
                 normalize
])

## Normalization only for validation and test
ds_transform_raw = transforms.Compose([
                 transforms.Scale(224),
                 transforms.ToTensor(),
                 normalize
                 ])

####     #########     ########     ###########     #####

X_train = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',
                             ds_transform_augmented
                             )
X_val = KaggleAmazonDataset('./data/train.csv','./data/train-jpg/','.jpg',
                             ds_transform_raw
                             )

# Creating a validation split
train_idx, valid_idx = train_valid_split(X_train, 0.2)

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetSampler(valid_idx)

######    ##########    ##########    ########    #########

# Both dataloader loads from the same dataset but with different indices
train_loader = DataLoader(X_train,
                      batch_size=batch_size,
                      sampler=train_sampler,
                      num_workers=4,
                      pin_memory=True)

valid_loader = DataLoader(X_val,
                      batch_size=batch_size,
                      sampler=valid_sampler,
                      num_workers=4,
                      pin_memory=True)

In [99]:
def train(epoch, train_loader, encoder, decoder, criterion, optimizer):
    encoder.eval()
    decoder.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(async=True), target.cuda(async=True) # On GPU
        data, target = Variable(data), Variable(target, requires_grad=False)
        optimizer.zero_grad()
        encoded = encoder(data)
        output = decoder(encoded)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

In [100]:
for epoch in range(1, 10):
    train(epoch, train_loader, encoderCNN, decoderRNN, criterion, optimizer)





