In [1]:
from __future__ import print_function

import argparse
import csv
import os
import shutil
import time
from imgaug import augmenters as iaa

import numpy as np
import torch

import sys
import time
import math
import torchvision.datasets as datasets
from PIL import Image
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.init as init
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F

from tensorboardX import SummaryWriter

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')


CUDA is available!  Training on GPU ...


In [2]:

class TransformTwice:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, inp):
        out1 = self.transform(inp)
        out2 = self.transform(inp)
        return out1, out2

def get_cifar10(root, n_labeled,
                 transform_train=None, transform_val=None,
                 download=True):

    base_dataset = torchvision.datasets.CIFAR10(root, train=True, download=download)
    train_labeled_idxs, train_unlabeled_idxs, val_idxs = train_val_split(base_dataset.targets, int(n_labeled/10))

    train_labeled_dataset = CIFAR10_labeled(root, train_labeled_idxs, train=True, transform=transform_train)
    train_unlabeled_dataset = CIFAR10_unlabeled(root, train_unlabeled_idxs, train=True, transform=TransformTwice(transform_train))
    val_dataset = CIFAR10_labeled(root, val_idxs, train=True, transform=transform_val, download=True)
    test_dataset = CIFAR10_labeled(root, train=False, transform=transform_val, download=True)

    print (f"#Labeled: {len(train_labeled_idxs)} #Unlabeled: {len(train_unlabeled_idxs)} #Val: {len(val_idxs)}")
    return train_labeled_dataset, train_unlabeled_dataset, val_dataset, test_dataset
    

def train_val_split(labels, n_labeled_per_class):
    labels = np.array(labels)
    train_labeled_idxs = []
    train_unlabeled_idxs = []
    val_idxs = []

    for i in range(10):
        idxs = np.where(labels == i)[0]
        np.random.shuffle(idxs)
        train_labeled_idxs.extend(idxs[:n_labeled_per_class])
        train_unlabeled_idxs.extend(idxs[n_labeled_per_class:-500])
        val_idxs.extend(idxs[-500:])
    np.random.shuffle(train_labeled_idxs)
    np.random.shuffle(train_unlabeled_idxs)
    np.random.shuffle(val_idxs)

    return train_labeled_idxs, train_unlabeled_idxs, val_idxs

cifar10_mean = (0.4914, 0.4822, 0.4465) # equals np.mean(train_set.train_data, axis=(0,1,2))/255
cifar10_std = (0.2471, 0.2435, 0.2616) # equals np.std(train_set.train_data, axis=(0,1,2))/255

def normalise(x, mean=cifar10_mean, std=cifar10_std):
    x, mean, std = [np.array(a, np.float32) for a in (x, mean, std)]
    x -= mean*255
    x *= 1.0/(255*std)
    return x

def transpose(x, source='NHWC', target='NCHW'):
    return x.transpose([source.index(d) for d in target]) 

def pad(x, border=4):
    return np.pad(x, [(0, 0), (border, border), (border, border)], mode='reflect')

class RandomPadandCrop(object):
    """Crop randomly the image.
    Args:
        output_size (tuple or int): Desired output size. If int, square crop
            is made.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, x):
        x = pad(x, 4)

        h, w = x.shape[1:]
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)

        x = x[:, top: top + new_h, left: left + new_w]

        return x

class RandomFlip(object):
    """Flip randomly the image.
    """
    def __call__(self, x):
        if np.random.rand() < 0.5:
            x = x[:, :, ::-1]

        return x.copy()

class GaussianNoise(object):
    """Add gaussian noise to the image.
    """
    def __call__(self, x):
        c, h, w = x.shape
        x += np.random.randn(c, h, w) * 0.15
        return x

class ToTensor(object):
    """Transform the image to tensor.
    """
    def __call__(self, x):
        x = torch.from_numpy(x)
        return x

class CIFAR10_labeled(torchvision.datasets.CIFAR10):

    def __init__(self, root, indexs=None, train=True,
                 transform=None, target_transform=None,
                 download=False):
        super(CIFAR10_labeled, self).__init__(root, train=train,
                 transform=transform, target_transform=target_transform,
                 download=download)
        if indexs is not None:
            self.data = self.data[indexs]
            self.targets = np.array(self.targets)[indexs]
        self.data = transpose(normalise(self.data))

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], self.targets[index]

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target
    

class CIFAR10_unlabeled(CIFAR10_labeled):

    def __init__(self, root, indexs, train=True,
                 transform=None, target_transform=None,
                 download=False):
        super(CIFAR10_unlabeled, self).__init__(root, indexs, train=train,
                 transform=transform, target_transform=target_transform,
                 download=download)
        self.targets = np.array([-1 for i in range(len(self.targets))])

In [3]:
import math
import torch
from torch import nn
from torch.nn import functional as F
from efficientnet_pytorch import EfficientNet as effNet

class Conv2dSamePadding(nn.Conv2d):
    """ 2D Convolutions like TensorFlow """
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]]*2

    def forward(self, x):
        ih, iw = x.size()[-2:]
        kh, kw = self.weight.size()[-2:]
        sh, sw = self.stride
        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
        if pad_h > 0 or pad_w > 0:
            x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2])
        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)


def relu_fn(x):
    """ Swish activation function """
    return x * torch.sigmoid(x)


class EfficientNet(nn.Module):
    def __init__(self, num_classes, version = 'b0'):
        super(EfficientNet, self).__init__()

        # load pretrained EfficientNet B3
        self.model_ft = effNet.from_pretrained(f'efficientnet-{version}')

        for child in self.model_ft.children():

          for param in child.parameters():
            param.requires_grad = False

        # re-init last conv layer and last fc layer to fit with dataset
        in_channels = self.model_ft._conv_head.in_channels
        out_channels = self.model_ft._conv_head.out_channels
        num_ftrs = self.model_ft._fc.in_features

        self.model_ft._conv_head = Conv2dSamePadding(in_channels, out_channels, kernel_size=(1,1), stride=(1,1), bias=False)
        self.model_ft._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=0.010000000000000009, eps = 0.001)
        self.model_ft._fc = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.model_ft(x)

In [4]:
%tb

No traceback available to show.


In [5]:

def get_augmenter():
    seq = iaa.Sequential([
        iaa.Crop(px=(0, 16)),
        iaa.Fliplr(0.5),
        iaa.GaussianBlur(sigma=(0, 3.0))
    ])
    def augment(images):
        return seq.augment(images.transpose(0, 2, 3, 1)).transpose(0, 2, 3, 1)
    return augment

In [6]:
def sharpen(x, T):
    temp = x**(1/T)
    return temp / temp.sum(axis=1, keepdims=True)

In [7]:
def mixup(x1, x2, y1, y2, alpha):
    beta = np.random.beta(alpha, -alpha)
    x = beta * x1 + (1 - beta) * x2
    y = beta * y1 + (1 - beta) * y2
    return x, y

In [8]:
def mixmatch(x, y, u, model, augment_fn, T=0.5, K=2, alpha=0.75):
    xb = augment_fn(x)
    ub = [augment_fn(u) for _ in range(K)]
    qb = sharpen(sum(map(lambda i: model(i), ub)) / K, T)
    Ux = np.concatenate(ub, axis=0)
    Uy = np.concatenate([qb for _ in range(K)], axis=0)
    indices = np.random.shuffle(np.arange(len(xb) + len(Ux)))
    Wx = np.concatenate([Ux, xb], axis=0)[indices]
    Wy = np.concatenate([qb, y], axis=0)[indices]
    X, p = mixup(xb, Wx[:len(xb)], y, Wy[:len(xb)], alpha)
    U, q = mixup(Ux, Wx[len(xb):], Uy, Wy[len(xb):], alpha)
    return X, U, p, q

In [9]:
class MixMatchLoss(torch.nn.Module):
    def __init__(self, lambda_u=100):
        self.lambda_u = lambda_u
        self.xent = torch.nn.CrossEntropyLoss()
        self.mse = torch.nn.MSELoss()
        super(MixMatchLoss, self).__init__()
    
    def forward(self, X, U, p, q, model):
        X_ = np.concatenate([X, U], axis=1)
        preds = model(X_)
        return self.xent(preds[:len(p)], p) + \
                                    self.lambda_u * self.mse(preds[len(p):], q)


In [10]:
def conv3x3(in_planes, out_planes, stride=1):
    return torch.nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                           bias=True)

In [11]:
def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_uniform(m.weight, gain=np.sqrt(2))
        torch.nn.init.constant(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        torch.nn.init.constant(m.weight, 1)
        torch.nn.init.constant(m.bias, 0)

In [13]:
def basic_generator(x, y=None, batch_size=32, shuffle=True):
    i = 0
    all_indices = np.random.shuffle(np.arange(len(x))) if shuffle else \
                                                               np.arange(len(x))
    while(True):
        indices = all_indices[i:i+batch_size]
        if y is not None:
            yield x[indices], y[indices]
        yield x[indices]
        i = (i + batch_size) % len(x)

In [14]:
def mixmatch_wrapper(x, y, u, model, batch_size=32):
    augment_fn = get_augmenter()
    train_generator = basic_generator(x, y, batch_size)
    unlabeled_generator = basic_generator(u, batch_size=batch_size)
    while(True):
        xi, yi = next(train_generator)
        ui = next(unlabeled_generator)
        yield mixmatch(xi, yi, ui, model, augment_fn)

In [15]:
def to_torch(*args, device='cuda'):
    convert_fn = lambda x: torch.from_numpy(x).to(device)
    return list(map(convert_fn, args))

In [16]:
def test(model, test_gen, test_iters):
    acc = []
    for i, (x, y) in enumerate(test_gen):
        x = to_torch(x)
        pred = model(x).to('cpu').argmax(axis=1)
        acc.append(np.mean(pred == y.argmax(axis=1)))
        if i == test_iters:
            break
    print('Accuracy was : {}'.format(np.mean(acc)))

In [17]:
def report(loss_history):
    print('Average loss in last epoch was : {}'.format(np.mean(loss_history)))
    return []

In [18]:
def save(model, iter, train_iters):
    torch.save(model.state_dict(), 'model_{}.pth'.format(train_iters // iters))

In [19]:
def run(model, train_gen, test_gen, epochs, train_iters, test_iters, device):
    optim = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = MixMatchLoss()
    loss_history = []
    for i, (x, u, p, q) in enumerate(train_gen):
        if i % train_iters == 0:
            loss_history = report(loss_history)
            test(model, test_gen, test_iters)
            save(model, i, train_iters)
            if i // train_iters == epochs:
                return
        else:
            optim.zero_grad()
            x, u, p, q = to_torch(x, u, p, q, device=device)
            loss = loss_fn(x, u, p, q, model)
            loss.backward()
            optim.step()
            loss_history.append(loss.to('cpu'))

In [None]:

if __name__ == "__main__":
    training_amount = 250
    training_u_amount = 40000
    validation_amount = 500

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=None)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=None)

    X_train = np.array(trainset.data)
    y_train = np.array(trainset.targets)

    X_test = np.array(testset.data)
    y_test = np.array(testset.targets)

    # Train set / Validation set split
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=validation_amount, random_state=1,
                                                              shuffle=True, stratify=y_train)

    # Train unsupervised / Train supervised split
    # Train set / Validation set split
    X_train, X_u_train, y_train, y_u_train = train_test_split(X_train, y_train, test_size=training_u_amount, random_state=1,
                                                              shuffle=True, stratify=y_train)

    X_remain, X_train, y_remain, y_train = train_test_split(X_train, y_train, test_size=training_amount, random_state=1,
                                                              shuffle=True, stratify=y_train)

    classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    # model = Wide_ResNet(28, 10, 0.3, 10).cuda()
    model = Wide_ResNet(10).cuda()

    y_train = torch.tensor(y_train).cuda()
    y_train = torch.nn.functional.one_hot(y_train).float()

    train_generator = mixmatch_wrapper(X_train, y_train, X_u_train, model, 32)
    test_generator = basic_generator(X_remain, y_remain, 32)
    # run(model, train_generator, test_generator, 100, 20, )

    optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=.00001)
    loss_fn = MixMatchLoss()
    final_loss = 0
    count = 0
    for i, (x, u, p, q) in enumerate(train_generator):
        model.train()
        loss = loss_fn(x, u, p, q, model)
        optim.zero_grad()
        loss.backward()
        optim.step()
        final_loss += loss.item()
        count += 1
        if i%100 == 0:
            print(f"loss is {final_loss/count}")
            test(model, test_generator, 50)
            model.train()