In [1]:
%matplotlib inline

In [2]:
import torch
import math
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F
#from torchsample.transforms import *

from sklearn.metrics import accuracy_score
from os import path
import os

import numpy as np
import PIL
import random
from IPython.core.display import Image, display

import matplotlib.pyplot as plt

In [3]:
#DEVICE_ID = 3
#DEVICE = torch.device('cuda:%d' % DEVICE_ID)
#torch.cuda.set_device(DEVICE_ID)
# bWQ2aGt1cW9pYmVodGJubXUyOW5yNmU0c2k6MzQ2MzUxNmEtMzA5OC00ZWE3LWEwNzEtNzk4ZTUyMWM3MzMy


In [4]:
### Для запуска без GPU раскомментировать и закоментировать код выше
DEVICE = torch.device('cpu')

In [5]:
import torch as th

class Rotate(object):

    def __init__(self, 
                 value,
                 interp='bilinear',
                 lazy=False):
        """
        Randomly rotate an image between (-degrees, degrees). If the image
        has multiple channels, the same rotation will be applied to each channel.

        Arguments
        ---------
        rotation_range : integer or float
            image will be rotated between (-degrees, degrees) degrees

        interp : string in {'bilinear', 'nearest'} or list of strings
            type of interpolation to use. You can provide a different
            type of interpolation for each input, e.g. if you have two
            inputs then you can say `interp=['bilinear','nearest']

        lazy    : boolean
            if true, only create the affine transform matrix and return that
            if false, perform the transform on the tensor and return the tensor
        """
        self.value = value
        self.interp = interp
        self.lazy = lazy

    def __call__(self, *inputs):
        if not isinstance(self.interp, (tuple,list)):
            interp = [self.interp]*len(inputs)
        else:
            interp = self.interp

        theta = math.pi / 180 * self.value
        rotation_matrix = th.FloatTensor([[math.cos(theta), -math.sin(theta), 0],
                                          [math.sin(theta), math.cos(theta), 0],
                                          [0, 0, 1]])
        if self.lazy:
            return rotation_matrix
        else:
            outputs = []
            for idx, _input in enumerate(inputs):
                input_tf = th_affine2d(_input,
                                       rotation_matrix,
                                       mode=interp[idx],
                                       center=True)
                outputs.append(input_tf)
            return outputs if idx > 1 else outputs[0]

In [6]:
def th_affine2d(x, matrix, mode='bilinear', center=True):
    """
    2D Affine image transform on th.Tensor
    
    Arguments
    ---------
    x : th.Tensor of size (C, H, W)
        image tensor to be transformed

    matrix : th.Tensor of size (3, 3) or (2, 3)
        transformation matrix

    mode : string in {'nearest', 'bilinear'}
        interpolation scheme to use

    center : boolean
        whether to alter the bias of the transform 
        so the transform is applied about the center
        of the image rather than the origin

    Example
    ------- 
    >>> import torch
    >>> from torchsample.utils import *
    >>> x = th.zeros(2,1000,1000)
    >>> x[:,100:1500,100:500] = 10
    >>> matrix = th.FloatTensor([[1.,0,-50],
    ...                             [0,1.,-50]])
    >>> xn = th_affine2d(x, matrix, mode='nearest')
    >>> xb = th_affine2d(x, matrix, mode='bilinear')
    """

    if matrix.dim() == 2:
        matrix = matrix[:2,:]
        matrix = matrix.unsqueeze(0)
    elif matrix.dim() == 3:
        if matrix.size()[1:] == (3,3):
            matrix = matrix[:,:2,:]

    A_batch = matrix[:,:,:2]
    if A_batch.size(0) != x.size(0):
        A_batch = A_batch.repeat(x.size(0),1,1)
    b_batch = matrix[:,:,2].unsqueeze(1)

    # make a meshgrid of normal coordinates
    _coords = th_iterproduct(x.size(1),x.size(2))
    coords = _coords.unsqueeze(0).repeat(x.size(0),1,1).float()

    if center:
        # shift the coordinates so center is the origin
        coords[:,:,0] = coords[:,:,0] - (x.size(1) / 2. - 0.5)
        coords[:,:,1] = coords[:,:,1] - (x.size(2) / 2. - 0.5)
    # apply the coordinate transformation
    new_coords = coords.bmm(A_batch.transpose(1,2)) + b_batch.expand_as(coords)

    if center:
        # shift the coordinates back so origin is origin
        new_coords[:,:,0] = new_coords[:,:,0] + (x.size(1) / 2. - 0.5)
        new_coords[:,:,1] = new_coords[:,:,1] + (x.size(2) / 2. - 0.5)

    # map new coordinates using bilinear interpolation
    if mode == 'nearest':
        x_transformed = th_nearest_interp2d(x.contiguous(), new_coords)
    elif mode == 'bilinear':
        x_transformed = th_bilinear_interp2d(x.contiguous(), new_coords)

    return x_transformed

In [7]:
def th_bilinear_interp2d(input, coords):
    """
    bilinear interpolation in 2d
    """
    x = th.clamp(coords[:,:,0], 0, input.size(1)-2)
    x0 = x.floor()
    x1 = x0 + 1
    y = th.clamp(coords[:,:,1], 0, input.size(2)-2)
    y0 = y.floor()
    y1 = y0 + 1

    stride = th.FloatTensor(input.stride())
    x0_ix = x0.mul(stride[1]).long()
    x1_ix = x1.mul(stride[1]).long()
    y0_ix = y0.mul(stride[2]).long()
    y1_ix = y1.mul(stride[2]).long()

    input_flat = input.view(input.size(0),-1)

    vals_00 = input_flat.gather(1, x0_ix.add(y0_ix))
    vals_10 = input_flat.gather(1, x1_ix.add(y0_ix))
    vals_01 = input_flat.gather(1, x0_ix.add(y1_ix))
    vals_11 = input_flat.gather(1, x1_ix.add(y1_ix))
    
    xd = x - x0
    yd = y - y0
    xm = 1 - xd
    ym = 1 - yd

    x_mapped = (vals_00.mul(xm).mul(ym) +
                vals_10.mul(xd).mul(ym) +
                vals_01.mul(xm).mul(yd) +
                vals_11.mul(xd).mul(yd))

    return x_mapped.view_as(input)

In [8]:
def th_iterproduct(*args):
    return th.from_numpy(np.indices(args).reshape((len(args),-1)).T)

In [9]:
np.random.seed(100500)

def data2image(data):
    res = np.transpose(np.reshape(data ,(3, 32,32)), (1,2,0))
    return PIL.Image.fromarray(np.uint8(res))

def imshow(img):
    if isinstance(img, torch.Tensor): img = img.numpy().astype('uint8')
    plt.imshow(np.transpose(img, (1, 2, 0)))
    
def prediction2classes(output_var):
    _, predicted = torch.max(output_var.data, 1)
    predicted.squeeze_()
    classes = predicted.tolist()
    return classes

def make_solution_pytorch(net, input_tensor, a_batch_size):
    res = []
    net = net.eval()
    cur_pos = 0
    while cur_pos <= len(input_tensor):
        outputs = net(input_tensor[cur_pos:cur_pos+a_batch_size])
        res += prediction2classes(outputs)
        cur_pos += a_batch_size
    return res

In [10]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image

class CifarDataset(Dataset):
    def __init__(self, input_path, is_train=True, transform=None):
                        
        data = np.load(input_path)
        if is_train: 
            self.Y, self.X = np.hsplit(data, [1]) 
            self.Y = [item[0] for item in self.Y]
        else: 
            self.X = data
            self.Y = None
            
        self.X = self.X.reshape((self.X.shape[0], 3, 32, 32))
        self.X = self.X.transpose((0, 2, 3, 1)) #приводим к виду (N, H, W, C)
        self.X = [PIL.Image.fromarray(img) for img in self.X]
                
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        
        sample = self.X[idx]

        if self.transform: sample = self.transform(sample)

        if self.Y is None: return sample
        else: return (sample, self.Y[idx])

In [11]:
#Надо поменять пути на свои до файлов с kaggle
DATA_PATH  = os.getcwd()  + '/'
train_path = 'homework_4.train.npy'
test_path  = 'homework_4_no_classes.test.npy'

In [12]:
np_mean = np.mean([item[0].numpy() for item in CifarDataset(DATA_PATH + train_path, transform=transforms.ToTensor())], axis=(0,2,3))
np_std = np.std([item[0].numpy() for item in CifarDataset(DATA_PATH + train_path, transform=transforms.ToTensor())], axis=(0,2,3))

In [13]:
np_mean.shape

(3,)

In [14]:
cifar_transform_norm = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
]
)

cifar_test_transform_norm = transforms.Compose([    
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
]
)

In [15]:
dataset_train_norm = CifarDataset(DATA_PATH + train_path, transform=cifar_transform_norm)
dataloader_train_norm = DataLoader(dataset_train_norm, batch_size=128,
                        shuffle=True, num_workers=4)

dataset_test_norm = CifarDataset(DATA_PATH + test_path, is_train=False, transform=cifar_test_transform_norm)
dataloader_test_norm = DataLoader(dataset_test_norm, batch_size=128,
                        shuffle=False, num_workers=1)


def train_network(a_net, 
                  a_device,
                  dataloader_train_norm=dataloader_train_norm,
                  a_epochs=1000,
                  a_batch_size=128,
                  a_lr=0.08):
    
    train_acc = []
    net = a_net.to(a_device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr, weight_decay=0.0001, momentum=0.9)

    
    for epoch in range(a_epochs):  # loop over the dataset multiple times
        if (epoch+1)%5 == 0:
            optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr*0.98, weight_decay=0.0001, momentum=0.9) 
        elif epoch == 123:
            optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr*0.98, weight_decay=0.0001, momentum=0.9) 
        
        net = net.train()        
        epoch_accuracy = 0.0
        epoch_iters = 0
        for item in dataloader_train_norm:
            
            epoch_iters += 1
            
            #get_augumentation(item[0])
            inputs = item[0].to(a_device)
            labels = item[1].long().to(a_device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_accuracy += accuracy_score(labels, prediction2classes(outputs))

        epoch_accuracy /= epoch_iters
        train_acc.append(epoch_accuracy)
        
        print("Epoch ", epoch, round(train_acc[-1], 4))

    print('Finished Training')
    
    plt.plot(train_acc, label='Train')
    plt.legend()
    plt.grid()

In [16]:
def get_augumentation(batch):
    #new_batch = torch.FloatTensor(batch.size()).fill_(0)
    for i, tensor in enumerate(batch):
        choise = np.random.randint(0, 5)
        #if choise == np.random.randint(0, 500):
            #print('change gaus ', i)
            #tensor = tensor + torch.FloatTensor(tensor.size()).uniform_(-0.2, 0.2)
        if choise == np.random.randint(0, 5):
            #print('change rotate', i)
            tensor = Rotate(90*((np.random.randint(0, 9))%4))(tensor)
        else:
            tensor = tensor
    #return new_batch

In [17]:
class StupidDenseNet(nn.Module):
    
    def __init__(self):
        super(StupidDenseNet, self).__init__()
        
        #Один из способов задать сеть - это задать последовательность слоев через Sequential
        self.classifier = nn.Sequential()
        self.classifier.add_module('lin1', nn.Linear(3*32*32, 500))
        self.classifier.add_module('sig1', torch.nn.Sigmoid())
        self.classifier.add_module('lin2', nn.Linear(3*32*32, 500))
        self.classifier.add_module('sig2', torch.nn.Sigmoid())
        self.classifier.add_module('lin3', nn.Linear(3*32*32, 500))
        self.classifier.add_module('sig3', torch.nn.Sigmoid())
        
    def forward(self, x):
        x = self.classifier(x)
        return x

In [18]:
class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()

        ### Другой способ задания сети - это описать слои и в forward их применять явно
        
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        #Увеличиваем кол-во выходных слоев с 84 - до 84*2 - потому что классов 100
        self.fc2 = nn.Linear(120, 84*2)
        self.fc3 = nn.Linear(84*2, 100)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [19]:
DOWNSAMPLE_COEF = 2

def conv3x3(a_in_planes, a_out_planes, a_stride=1):
    """
    Основной строительный блок конволюций для ResNet
    Включает в себя padding=1 - чтобы размерность сохранялась после его применения
    """
    return nn.Conv2d(a_in_planes, a_out_planes,  stride=a_stride,
                     kernel_size=3, padding=1, bias=False)

def x_downsample(a_in_channels):
     return nn.Conv2d(a_in_channels, 
               a_in_channels*DOWNSAMPLE_COEF,
               kernel_size=1,
               stride=2,
               bias=False)

In [20]:
class CifarResidualBlock(nn.Module):
    def __init__(self, a_in_channels, make_downsample=False, use_skip_connection=True):
        super(CifarResidualBlock, self).__init__()
        self.use_skip_connection = use_skip_connection
        
        if make_downsample: coef = DOWNSAMPLE_COEF
        else: coef = 1  
            
        ### TODO - нужно описать используемые блоки
        planes = int(coef*a_in_channels)
        
        self.conv1 = conv3x3(a_in_channels, planes, a_stride=coef)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        
        if make_downsample:
            self.downsample = x_downsample(a_in_channels)
        else:
            self.downsample = None
            
    def forward(self, x):
        ###TODO - описать forward блок с учетом флагов make_downsample и use_skip_connection
        residual = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        
        if self.downsample is not None:
            #print('1', out.size())
            if self.use_skip_connection:
                residual = self.downsample(x)
            #out = self.downsample(out)
            #print('2', out.size(), residual.size())
         
        #print('3', out.size())
        
        if self.use_skip_connection:
            out += residual
        #print('4', out.size())
        out = self.relu(out)
        
        return out

In [21]:
class CifarResidualBottleneckBlock(nn.Module):
    
    BOTTLENECK_COEF = 4
    
    def __init__(self, a_in_channels, make_downsample=False, use_skip_connection=True):
        super(CifarResidualBottleneckBlock, self).__init__()
        self.use_skip_connection = use_skip_connection
        
        if make_downsample: coef = DOWNSAMPLE_COEF
        else: coef = 1  
            
        planes = int(coef*a_in_channels/self.BOTTLENECK_COEF)
        planes_out = int(a_in_channels*coef)
        ### TODO - нужно описать используемые блоки
        self.conv1 = nn.Conv2d(a_in_channels, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, stride=coef, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes_out, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes_out)
        self.relu = nn.ReLU(inplace=True)
        
        if make_downsample:
            self.downsample = nn.Conv2d(a_in_channels, 
               planes_out,
               kernel_size=1,
               stride=2,
               bias=False)
        else:
            self.downsample = None
            
    def forward(self, x):
        ###TODO - описать forward блок с учетом флагов make_downsample и use_skip_connection
        residual = x
        #print('1', x.size())
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        
        out = self.conv3(out)
        out = self.bn3(out)
        
        if self.downsample is not None:
            #print('1', out.size())
            if self.use_skip_connection:
                residual = self.downsample(x)
            #out = self.downsample(out)
            #print('2', out.size(), residual.size())
         
        #print('3', out.size())
        
        if self.use_skip_connection:
            out += residual
            
        #print('4', out.size())
        out = self.relu(out)
        
        return out

In [22]:
class CifarResNet(nn.Module):
    
    def __init__(self, layers=[2,2,2,2]):
        super(CifarResNet, self).__init__()
        
        #TODO нужно добавить блоков resnet и других слоев при необходимости
        self.out_chanels = 64
        self.features = nn.Sequential()
        self.features.add_module('conv1', nn.Conv2d(3, self.out_chanels, kernel_size=7, stride=2, padding=3, bias=False))
        self.features.add_module('bn1', nn.BatchNorm2d(self.out_chanels))
        self.features.add_module('relu', nn.ReLU(inplace=True))
        self.features.add_module('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        #...
        for i in range(layers[0]):
            self.features.add_module('res_block1'+str(i+1), CifarResidualBottleneckBlock(self.out_chanels))
            
        #self.features.add_module('res_block2'+str(1), CifarResidualBlock(64, make_downsample=True))
        for i in range(layers[1]):
            self.features.add_module('res_block2'+str(i+1), CifarResidualBottleneckBlock(self.out_chanels, make_downsample=False))
            
        for i in range(layers[2]):
            self.features.add_module('res_block3'+str(i+1), CifarResidualBottleneckBlock(self.out_chanels, make_downsample=False))
                                     
        for i in range(layers[3]):
            self.features.add_module('res_block4'+str(i+1), CifarResidualBottleneckBlock(self.out_chanels, make_downsample=False))
        #...
        
        self.global_avg_pooling = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc_classifier = nn.Linear(self.out_chanels*4, 100)
        
    def forward(self, x):
        x = self.features(x)
        #print(x.size())
        x = self.global_avg_pooling(x)
        x = x.view((x.size()[0], -1))
        #print(x.size())
        x = self.fc_classifier(x)        
        return x

In [23]:
#dense_net = StupidDenseNet()
#%time train_network(dense_net, torch.device(DEVICE))

In [24]:
#lenet = LeNet()
#%time train_network(lenet, torch.device('cpu'), a_epochs=10)

In [25]:
#lenet = LeNet()
#%time train_network(lenet, torch.device(DEVICE), a_epochs=10)

In [26]:
#lenet = LeNet()
#%time train_network(lenet, torch.device(DEVICE))

In [None]:
resnet = CifarResNet([2, 8, 8, 2])
%time train_network(resnet, torch.device(DEVICE))

Epoch  0 0.0601
Epoch  1 0.1438
Epoch  2 0.2011
Epoch  3 0.2464
Epoch  4 0.2813
Epoch  5 0.3109
Epoch  6 0.3346
Epoch  7 0.3562
Epoch  8 0.378
Epoch  9 0.3927
Epoch  10 0.4075
Epoch  11 0.4206
Epoch  12 0.4315
Epoch  13 0.4409
Epoch  14 0.4506
Epoch  15 0.4583
Epoch  16 0.466
Epoch  17 0.4714
Epoch  18 0.4798
Epoch  19 0.4813
Epoch  20 0.4847
Epoch  21 0.4893
Epoch  22 0.497
Epoch  23 0.4965
Epoch  24 0.5025
Epoch  25 0.5051
Epoch  26 0.5088
Epoch  27 0.5141
Epoch  28 0.5132
Epoch  29 0.5184
Epoch  30 0.5222
Epoch  31 0.5188
Epoch  32 0.5253
Epoch  33 0.5247
Epoch  34 0.5301
Epoch  35 0.5274
Epoch  36 0.531
Epoch  37 0.534
Epoch  38 0.5352
Epoch  39 0.5385
Epoch  40 0.538
Epoch  41 0.5396
Epoch  42 0.5408
Epoch  43 0.5435
Epoch  44 0.5453
Epoch  45 0.5478
Epoch  46 0.545
Epoch  47 0.5477
Epoch  48 0.5471
Epoch  49 0.55
Epoch  50 0.5527
Epoch  51 0.5541
Epoch  52 0.5515
Epoch  53 0.5516
Epoch  54 0.5579
Epoch  55 0.5569
Epoch  56 0.5573
Epoch  57 0.5587
Epoch  58 0.5555
Epoch  59 0.5635

# Важно переключить сеть в режим eval - иначе dropout будет работать некорректно

In [22]:
def make_solution(a_net, a_device):
    res = []
    net = a_net.eval()
    for item in dataloader_test_norm:
        inputs = item.to(a_device)
        outputs = net(inputs) 

        res += prediction2classes(outputs)
    return res

In [23]:
# my_solution = make_solution(dense_net, DEVICE)
my_solution = make_solution(resnet, DEVICE)

In [24]:
with open('my_solution6.csv', 'w') as fout:
    print('Id', 'Prediction', sep=',', file=fout)
    for i, prediction in enumerate(my_solution):
        print(i, prediction, sep=',', file=fout)