In [None]:
%cd /kaggle/input

In [None]:
!ls

In [None]:
'''Some helper functions for PyTorch, including:
    - get_mean_and_std: calculate the mean and std value of dataset.
    - msr_init: net parameter initialization.
    - progress_bar: progress bar mimic xlua.progress.
'''
import sys
import time

import torch
import torch.nn as nn
import torch.nn.init as init


def get_mean_and_std(dataset):
    '''Compute the mean and std value of dataset.'''
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=True, num_workers=2)
    mean = torch.zeros(3)
    std = torch.zeros(3)
    print('==> Computing mean and std..')
    for inputs, _ in dataloader:
        for i in range(3):
            mean[i] += inputs[:, i, :, :].mean()
            std[i] += inputs[:, i, :, :].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean, std


def init_params(net):
    '''Init layer parameters.'''
    for m in net.modules():
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal(m.weight, mode='fan_out')
            if m.bias:
                init.constant(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            init.constant(m.weight, 1)
            init.constant(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.normal(m.weight, std=1e-3)
            if m.bias:
                init.constant(m.bias, 0)


TERM_WIDTH = 100
TOTAL_BAR_LENGTH = 20.
last_time = time.time()
begin_time = last_time


def progress_bar(current, total, msg=None):
    global last_time, begin_time
    if current == 0:
        begin_time = time.time()  # Reset for new bar.

    cur_len = int(TOTAL_BAR_LENGTH*current/total)
    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1

    sys.stdout.write(' [')
    for i in range(cur_len):
        sys.stdout.write('=')
    sys.stdout.write('>')
    for i in range(rest_len):
        sys.stdout.write('.')
    sys.stdout.write(']')

    cur_time = time.time()
    step_time = cur_time - last_time
    last_time = cur_time
    tot_time = cur_time - begin_time

    L = []
    L.append('  Step: %s' % format_time(step_time))
    L.append(' | Tot: %s' % format_time(tot_time))
    if msg:
        L.append(' | ' + msg)

    msg = ''.join(L)
    sys.stdout.write(msg)
    for i in range(TERM_WIDTH-int(TOTAL_BAR_LENGTH)-len(msg)-3):
        sys.stdout.write(' ')

    # Go back to the center of the bar.
    for i in range(TERM_WIDTH-int(TOTAL_BAR_LENGTH/2)+2):
        sys.stdout.write('\b')
    sys.stdout.write(' %d/%d ' % (current+1, total))

    if current < total-1:
        sys.stdout.write('\r')
    else:
        sys.stdout.write('\n')
    sys.stdout.flush()


def format_time(seconds):
    days = int(seconds / 3600/24)
    seconds = seconds - days*3600*24
    hours = int(seconds / 3600)
    seconds = seconds - hours*3600
    minutes = int(seconds / 60)
    seconds = seconds - minutes*60
    secondsf = int(seconds)
    seconds = seconds - secondsf
    millis = int(seconds*1000)

    f = ''
    i = 1
    if days > 0:
        f += str(days) + 'D'
        i += 1
    if hours > 0 and i <= 2:
        f += str(hours) + 'h'
        i += 1
    if minutes > 0 and i <= 2:
        f += str(minutes) + 'm'
        i += 1
    if secondsf > 0 and i <= 2:
        f += str(secondsf) + 's'
        i += 1
    if millis > 0 and i <= 2:
        f += str(millis) + 'ms'
        i += 1
    if f == '':
        f = '0ms'
    return f

In [None]:

# Dataset 1 ==> airplane automobile dog ship truck
# Dataset 2 ==> the rest
!ls cinic10/train

In [None]:
from __future__ import print_function

import argparse
import os
from datetime import datetime
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from skimage import io, transform

import torch
import torch.backends.cudnn as cudnn
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader 
import random


In [None]:

class FilteredDataset(datasets.ImageFolder):
    def __init__(self, root, wanted_labels=[], transform=None, target_transform=None, is_valid_file=None):
        self.wanted_labels=wanted_labels
        self.root= root
        datasets.ImageFolder.__init__(self,root= root, transform=transform, target_transform=target_transform,  is_valid_file=is_valid_file)
    def _find_classes(self, root ):
        
        classes_temp = [d.name for d in os.scandir(root) if d.is_dir()]
        classes_temp.sort()
        
        classes=[]
        class_to_idx={}
        class_to_idx_temp = {cls_name: i for i, cls_name in enumerate(classes_temp)}
        
        for i in self.wanted_labels:
            for cls in classes_temp:
                if class_to_idx_temp[str(cls)]==i:
                    classes.append(cls)
                    class_to_idx.update({cls:i})
                    
              
            
        return classes, class_to_idx
    

In [None]:
traindir = os.path.join('cinic10/', 'train')
validatedir = os.path.join('cinic10', 'valid')
testdir = os.path.join('cinic10', 'test')
cinic_mean = [0.47889522, 0.47227842, 0.43047404]
cinic_std = [0.24205776, 0.23828046, 0.25874835]
normalize = transforms.Normalize(mean=cinic_mean, std=cinic_std)

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=cinic_mean, std=cinic_std)
])
def target_trans(x):
    return x-5
#trainset_primary = datasets.ImageFolder(root=traindir, transform=train_transform)
trainset_1 = FilteredDataset(root=traindir, wanted_labels=[0, 1, 2, 3, 4],  transform=train_transform)
trainset_2 = FilteredDataset(root=traindir, wanted_labels=[5,6,7,8,9],  transform=train_transform)
train_loader_1=DataLoader(trainset_1, batch_size=64, shuffle= True)
train_loader_2=DataLoader(trainset_2, batch_size=64, shuffle=True)

In [None]:
print(next(iter(train_loader_1))[0][0].size())
X,Y=next(iter(train_loader_1))
print(Y[0])


plt.imshow(X[0].numpy().transpose(1,2,0))
plt.show()
    
    

In [None]:
import torch
import torch.nn as nn

should_buffer = True

cfg = {
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
}

# Both workers and master run a VGG16 but the master's VGG is modified to have only one output value


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)
        self.buffer = []

    def forward(self, x):
        
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        if should_buffer:
            self.buffer.append(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)
    
    def return_buffer(self):
        return self.buffer

class VGG_MASTER(nn.Module):
    def __init__(self, vgg_name):
        super(VGG_MASTER, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)
        self.classifier1= nn.Linear(10,1)
        
        self.buffer = []

    def forward(self, x):
        
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        out= self.classifier1(out)
        if should_buffer:
            self.buffer.append(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)
    
    def return_buffer(self):
        return self.buffer

def vgg16():
    return VGG('VGG16')

def vgg16_master():
    return VGG_MASTER('VGG16')

def test():
    net = vgg16()
    x = torch.randn(2, 3, 32, 32)
    y = net(x)
    print(y.size())

In [None]:
import torch.nn.functional as F
class Discriminator(nn.Module):
  def __init__(self):
    super(Discriminator, self).__init__()
    self.conv1 = nn.Conv2d(3,64,4,2,1)    #16*16
    self.conv2=  nn.Conv2d(64,128,4,2,1)  #8*8
    self.conv3 = nn.Conv2d(128,256,4,2,1) #4*4
    self.conv4 = nn.Conv2d(256,1,4,1,0)   #1*1
    self.drop1 = nn.Dropout(0.3)
    self.norm1_2d=nn.BatchNorm2d(64)
    self.norm2_2d=nn.BatchNorm2d(128)
    self.norm3_2d=nn.BatchNorm2d(256)
    

  def forward(self,x):

   
    #Three fully connected Layers
    
    #FC1
    x= self.conv1(x)
    x= self.norm1_2d(x)
    x= F.leaky_relu(x,0.2)    
    
    #FC2
    x= self.conv2(x)    
    x= self.norm2_2d(x)
    x= F.leaky_relu(x,0.2)
    x= self.drop1(x)

    #Fc3
    x= self.conv3(x)
    x= self.norm3_2d(x)
    x= F.leaky_relu(x,0.2)
       

    #FC4
    x= self.conv4(x)
    x=x.view(-1)
    x= torch.sigmoid(x)
    


    return x



In [None]:
parser = argparse.ArgumentParser(description='PyTorch CINIC10 Training')
parser.add_argument('--data', metavar='DIR', default='cinic10',
                    help='path to dataset (default: cinic10)')
parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
                    help='number of data loading workers (default: 2)')
parser.add_argument('--epochs', default=10, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('-b', '--batch-size', default=64, type=int,
                    metavar='N',
                    help='mini-batch size (default: 64), this is the total '
                         'batch size of all GPUs on the current node when '
                         'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')

args = parser.parse_args(['--data', 'cinic10'])
args.cuda = torch.cuda.is_available()

model1 = vgg16().float()
model2 = vgg16().float()

if args.cuda:
    model1.features = torch.nn.DataParallel(model1.features)
    model2.features = torch.nn.DataParallel(model2.features)
    model1.cuda()
    model2.cuda()


In [None]:
# Define loss function (criterion), optimizer and learning rate scheduler
criterion = torch.nn.CrossEntropyLoss()
optimizer1 = torch.optim.SGD(model1.parameters(),
                            lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
optimizer2 = torch.optim.SGD(model2.parameters(),
                            lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
scheduler1 = CosineAnnealingLR(optimizer=optimizer1, T_max=args.epochs, eta_min=0)
scheduler2 = CosineAnnealingLR(optimizer=optimizer2, T_max=args.epochs, eta_min=0)


def train1(epoch):
    should_buffer = False
    print('\nEpoch: %d' % epoch)
    cudnn.benchmark = True
    model1.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader_1):
        
        
        if args.cuda:
            
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer1.zero_grad()
        outputs = model1(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer1.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        progress_bar(batch_idx, len(train_loader_1), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))


def train2(epoch):
    should_buffer = False
    print('\nEpoch: %d' % epoch)
    cudnn.benchmark = True
    model2.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader_2):
        if args.cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer2.zero_grad()
        outputs = model2(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer2.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        progress_bar(batch_idx, len(train_loader_2), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))



In [None]:
for epoch in range(0, args.epochs):
    scheduler1.step()
    train1(epoch)
    scheduler2.step()
    train2(epoch)

In [None]:
master_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean=cinic_mean, std=cinic_std)
])
epochs_master=100
masterset = FilteredDataset(root=validatedir, wanted_labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],  transform=master_transform)
# Our validate set is our trainset for master
master_loader = DataLoader(masterset, batch_size=64, shuffle= False)

master_model = Discriminator().cuda()

def alpha_from_vectors(pred1, pred2, real):
    p1 = F.softmax(pred1).cpu().detach().numpy()
    
    p2 = F.softmax(pred2).cpu().detach().numpy()
    r = np.eye(10)[real.cpu().detach().numpy()]
    
    ratio = np.sum((p2 - r)*(p2-p1),axis=1)*1.0 / np.sum((p1-r)*(p2-p1), axis=1).ravel()
    
    # alpha / (1-alpha) = ratio
    # alpha = ratio / (1+ratio)
    alpha = ratio / (1+ratio)
    alpha= (alpha>0.5).astype(int)
    return alpha

In [None]:


inputs1 = []
outputs1 = []
targets1 = []
inputs2 = []
outputs2 = []
targets2 = []

def test1():
    model1.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(master_loader):
            if args.cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model1(inputs)
            
            inputs1.append(inputs)
            outputs1.append(outputs)
            targets1.append(targets)
            
            loss1 = criterion(outputs, targets)

            test_loss += loss1.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            progress_bar(batch_idx, len(master_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                         % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

def test2():
    model2.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(master_loader):
            if args.cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model2(inputs)
            
            inputs2.append(inputs)
            outputs2.append(outputs)
            targets2.append(targets)
            
            loss2 = criterion(outputs, targets)

            test_loss += loss2.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            progress_bar(batch_idx, len(master_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                         % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
            




    

In [None]:
alphas = []
def fill_alphas():
    # targets1 == targets2 and inputs1 == inputs2
    for i in range(len(targets1)):
        alphas.append(alpha_from_vectors(outputs1[i], outputs2[i], targets1[i]))


In [None]:
test1()
test2()


In [None]:
fill_alphas()

In [None]:
print(alphas[5])

In [None]:
master_criterion = torch.nn.BCELoss()

master_optimizer = torch.optim.Adam(master_model.parameters(),
                            lr=1e-03)        

master_scheduler = CosineAnnealingLR(optimizer=master_optimizer, T_max=epochs_master, eta_min=0)

def train_master(epoch):
    should_buffer = False
    print('\nEpoch: %d' % epoch)
    cudnn.benchmark = True
    master_model.train()
    train_loss = 0
    correct = 0
    total = 0
    perm=np.random.permutation(len(inputs1))
    for i in range(len(inputs1)):
        ind=perm[i]
        inputs = inputs1[ind]
        targets = torch.from_numpy(alphas[ind]).double()
        if args.cuda:
            inputs, targets = inputs.cuda(), targets.double().cuda()
        master_optimizer.zero_grad()
        outputs = master_model(inputs).double()
        #outputs= torch.sigmoid(outputs).double()
        #print(outputs)
        #print(outputs.type())
        loss = master_criterion(outputs, targets)
        #print(loss.type())
        loss.backward()
        master_optimizer.step()

        train_loss += loss.item()
        

        progress_bar(i, len(inputs1), 'Loss: %.3f '
                     % (train_loss/(i+1)))

In [None]:
for epoch in range(0, epochs_master):
    master_scheduler.step()
    train_master(epoch)
   

In [None]:
testset = FilteredDataset(root=testdir, wanted_labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],  transform=master_transform)
# Our validate set is our trainset for master
test_loader = DataLoader(testset, batch_size=64, shuffle= False)

In [None]:

def test_model():
    model1.eval()
    test_loss = 0
    correct = 0
    total = 0
    master_model.eval()
    model2.eval()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            if args.cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            beta=master_model(inputs)
            beta = beta.view(-1,1)
            outputs=beta*model1(inputs)+(1-beta)*model2(inputs)
            
           
            
            
            _, predicted = outputs.max(1)
            #print(beta, outputs, predicted)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            progress_bar(batch_idx, len(master_loader), ' Acc: %.3f%% (%d/%d)'
                         % (100.*correct/total, correct, total))

In [None]:
test_model()