In [None]:
%cd /kaggle/input

In [None]:
!ls

In [None]:
'''Some helper functions for PyTorch, including:
    - get_mean_and_std: calculate the mean and std value of dataset.
    - msr_init: net parameter initialization.
    - progress_bar: progress bar mimic xlua.progress.
'''
import sys
import time

import torch
import torch.nn as nn
import torch.nn.init as init


def get_mean_and_std(dataset):
    '''Compute the mean and std value of dataset.'''
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=True, num_workers=2)
    mean = torch.zeros(3)
    std = torch.zeros(3)
    print('==> Computing mean and std..')
    for inputs, _ in dataloader:
        for i in range(3):
            mean[i] += inputs[:, i, :, :].mean()
            std[i] += inputs[:, i, :, :].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean, std


def init_params(net):
    '''Init layer parameters.'''
    for m in net.modules():
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal(m.weight, mode='fan_out')
            if m.bias:
                init.constant(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            init.constant(m.weight, 1)
            init.constant(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.normal(m.weight, std=1e-3)
            if m.bias:
                init.constant(m.bias, 0)


TERM_WIDTH = 100
TOTAL_BAR_LENGTH = 20.
last_time = time.time()
begin_time = last_time


def progress_bar(current, total, msg=None):
    global last_time, begin_time
    if current == 0:
        begin_time = time.time()  # Reset for new bar.

    cur_len = int(TOTAL_BAR_LENGTH*current/total)
    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1

    sys.stdout.write(' [')
    for i in range(cur_len):
        sys.stdout.write('=')
    sys.stdout.write('>')
    for i in range(rest_len):
        sys.stdout.write('.')
    sys.stdout.write(']')

    cur_time = time.time()
    step_time = cur_time - last_time
    last_time = cur_time
    tot_time = cur_time - begin_time

    L = []
    L.append('  Step: %s' % format_time(step_time))
    L.append(' | Tot: %s' % format_time(tot_time))
    if msg:
        L.append(' | ' + msg)

    msg = ''.join(L)
    sys.stdout.write(msg)
    for i in range(TERM_WIDTH-int(TOTAL_BAR_LENGTH)-len(msg)-3):
        sys.stdout.write(' ')

    # Go back to the center of the bar.
    for i in range(TERM_WIDTH-int(TOTAL_BAR_LENGTH/2)+2):
        sys.stdout.write('\b')
    sys.stdout.write(' %d/%d ' % (current+1, total))

    if current < total-1:
        sys.stdout.write('\r')
    else:
        sys.stdout.write('\n')
    sys.stdout.flush()


def format_time(seconds):
    days = int(seconds / 3600/24)
    seconds = seconds - days*3600*24
    hours = int(seconds / 3600)
    seconds = seconds - hours*3600
    minutes = int(seconds / 60)
    seconds = seconds - minutes*60
    secondsf = int(seconds)
    seconds = seconds - secondsf
    millis = int(seconds*1000)

    f = ''
    i = 1
    if days > 0:
        f += str(days) + 'D'
        i += 1
    if hours > 0 and i <= 2:
        f += str(hours) + 'h'
        i += 1
    if minutes > 0 and i <= 2:
        f += str(minutes) + 'm'
        i += 1
    if secondsf > 0 and i <= 2:
        f += str(secondsf) + 's'
        i += 1
    if millis > 0 and i <= 2:
        f += str(millis) + 'ms'
        i += 1
    if f == '':
        f = '0ms'
    return f

In [None]:
import torch.utils.data
import random

used_indices = []
class FilteredDataset(torch.utils.data.dataset.Dataset):
    def __init__(self, dataset, wanted_labels):
        self.parent = dataset
        self.indices = []
        for index, (img, lab) in enumerate(dataset):
            if lab in wanted_labels:
                if random.random() > 0.8:
                    self.indices.append(index)
                    used_indices.append(index)
            else:
                if random.random() <= 0.8:
                    self.indices.append(index)
                    used_indices.append(index)
    
    def __getitem__(self, index):
        return self.parent[self.indices[index]]
    
    def __len__(self):
        return len(self.indices)
    
class RestOfDataset(torch.utils.data.dataset.Dataset):
    def __init__(self, dataset, used_inds):
        self.parent = dataset
        self.indices = []
        for index, (img, lab) in enumerate(dataset):
           if not (index in used_inds):
                self.indices.append(index)
    
    def __getitem__(self, index):
        return self.parent[self.indices[index]]
    
    def __len__(self):
        return len(self.indices)

In [None]:
from __future__ import print_function

import argparse
import os
from datetime import datetime

import torch
import torch.backends.cudnn as cudnn
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [None]:
# Dataset 1 ==> airplane automobile dog ship truck
# Dataset 2 ==> the rest
!ls cinic10/train

In [None]:
traindir = os.path.join('cinic10/', 'train')
validatedir = os.path.join('cinic10', 'valid')
testdir = os.path.join('cinic10', 'test')
cinic_mean = [0.47889522, 0.47227842, 0.43047404]
cinic_std = [0.24205776, 0.23828046, 0.25874835]
normalize = transforms.Normalize(mean=cinic_mean, std=cinic_std)

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=cinic_mean, std=cinic_std)
])

trainset_primary = datasets.ImageFolder(root=traindir, transform=train_transform)
trainset_1 = FilteredDataset(trainset_primary, [0, 1, 5, 8, 9])
trainset_2 = RestOfDataset(trainset_primary, used_indices)

In [None]:
print("[+] len of trainset_1: ", len(trainset_1))
print("[+] len of trainset_2: ", len(trainset_2))

In [None]:
import torch
import torch.nn as nn

should_buffer = True

cfg = {
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
}

# Both workers and master run a VGG16 but the master's VGG is modified to have only one output value


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)
        self.buffer = []

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        if should_buffer:
            self.buffer.append(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)
    
    def return_buffer(self):
        return self.buffer

class VGG_MASTER(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 1) 
        self.buffer = []

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        if should_buffer:
            self.buffer.append(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)
    
    def return_buffer(self):
        return self.buffer

def vgg16():
    return VGG('VGG16')

def vgg16_master():
    return VGG_MASTER('VGG16')

def test():
    net = vgg16()
    x = torch.randn(2, 3, 32, 32)
    y = net(x)
    print(y.size())

In [None]:
parser = argparse.ArgumentParser(description='PyTorch CINIC10 Training')
parser.add_argument('--data', metavar='DIR', default='cinic10',
                    help='path to dataset (default: cinic10)')
parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
                    help='number of data loading workers (default: 2)')
parser.add_argument('--epochs', default=15, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('-b', '--batch-size', default=64, type=int,
                    metavar='N',
                    help='mini-batch size (default: 64), this is the total '
                         'batch size of all GPUs on the current node when '
                         'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')

args = parser.parse_args(['--data', 'cinic10'])
args.cuda = not args.no_cuda and torch.cuda.is_available()

model1 = vgg16()
model2 = vgg16()

if args.cuda:
    model1.features = torch.nn.DataParallel(model.features)
    model2.features = torch.nn.DataParallel(model.features)
    model1.cuda()
    model2.cuda()


In [None]:
# Define loss function (criterion), optimizer and learning rate scheduler
criterion = torch.nn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD(model1.parameters(),
                            lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
optimizer2 = torch.optim.SGD(model2.parameters(),
                            lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
scheduler1 = CosineAnnealingLR(optimizer=optimizer1, T_max=args.epochs, eta_min=0)
scheduler2 = CosineAnnealingLR(optimizer=optimizer2, T_max=args.epochs, eta_min=0)


def train1(epoch):
    should_buffer = False
    print('\nEpoch: %d' % epoch)
    cudnn.benchmark = True
    model1.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainset_1):
        print(inputs.shape)
        if args.cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer1.zero_grad()
        outputs = model1(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer1.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))


def train2(epoch):
    should_buffer = False
    print('\nEpoch: %d' % epoch)
    cudnn.benchmark = True
    model2.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainset_2):
        if args.cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer2.zero_grad()
        outputs = model2(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer2.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))



In [None]:
for epoch in range(0, args.epochs):
    scheduler1.step()
    train1(epoch)