In [1]:
from helpers.data import get_dataloaders
from helpers.train import TrainingManager
from helpers.loss_accuracy import accuracy
from functools import partial
import models.resnet
import torch
import argparse
import torch.optim as optim
from torch.nn import CrossEntropyLoss
import torch.nn as nn
import torch.nn.functional as F
import random
import numpy as np

In [18]:
from torchvision.datasets import ImageNet
import torch.utils.data
import torchvision.transforms as transforms
def get_dataloaders(dataset_name, batch_size, num_workers=20):
    trainset, testset = _datasets_get_func[dataset_name]()
    dataloader = partial(torch.utils.data.DataLoader, batch_size=batch_size, num_workers=num_workers, pin_memory=True)
    return dataloader(trainset, shuffle=True), dataloader(testset)
        
    
    batch_size, num_workers

_datasets_base_folder = '/media/data1/idokessler'
_datasets_get_func = {}
_imagenet_folder = '/media/data1/nadavz/data/imagenet/'

_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
_transform_test_cifar10 = transforms.Compose([
                                        transforms.ToTensor(), _normalize])
_transform_train_cifar10 = transform=transforms.Compose([
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ColorJitter(0.3,0.3,0.3,0.3),
                                        transforms.ToTensor(),
                                        transforms.RandomErasing(), _normalize])
def _get_imagenet():
    return ImageNet(root=_imagenet_folder, split='train', transform=_transform_train_cifar10), \
            ImageNet(root=_imagenet_folder, split='val', transform=_transform_test_cifar10)

_datasets_get_func['imagenet'] = _get_imagenet

In [19]:
trainloader, testloader = get_dataloaders('imagenet', bs)

RuntimeError: ('Meta file not found or corrupted.', 'You can use download=True to create it.')

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('-in_c', type=int, required=True)
parser.add_argument('-r', type=int, required=True, nargs=3)
parser.add_argument('-b', type=int, required=True, nargs=3)
parser.add_argument('-c', type=int, required=True, nargs=3)
parser.add_argument('-bs', type=int, required=True)
parser.add_argument('-lr', type=float, required=True)
parser.add_argument('--trial', dest='is_trial', action='store_true')
parser.add_argument('--load', dest='load', action='store_true')
parser.set_defaults(is_trial=False, load=False)
args = parser.parse_args()
in_planes_parameter = args.in_c
repeats_parameter = args.r
num_blocks_parameters = args.b
num_channels_parameters = args.c
is_trial = args.is_trial
load = args.load
lr = args.lr
bs = args.bs

usage: ipykernel_launcher.py [-h] -in_c IN_C -r R R R -b B B B -c C C C -bs BS
                             -lr LR [--trial] [--load]
ipykernel_launcher.py: error: the following arguments are required: -in_c, -r, -b, -c, -bs, -lr


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
in_planes_parameter = 16
repeats_parameter = [1, 1, 1]
num_blocks_parameters = [3, 3, 3]
num_channels_parameters=[16, 32, 64]
is_trial = False
load = False
lr = 2e-4
bs = 32

In [3]:
trainloader, testloader = get_dataloaders('cifar10', bs)

Files already downloaded and verified


In [4]:
device = torch.device('cuda')

In [5]:
_Conv2d = partial(nn.Conv2d, kernel_size=3, stride=1, padding=1, bias=False)
_BN2d = nn.BatchNorm2d
_act = partial(nn.ReLU, inplace=True)

class WeightChangingConv(nn.Module):
    def __init__(self, in_c, out_c, kernel_size=3, stride=1, padding=0):
        super(WeightChangingConv, self).__init__()
        self.w_size = (out_c, in_c, kernel_size, kernel_size)
        self.stride = stride
        self.padding = padding
        self.w_base_weights = nn.Parameter(
            nn.init.kaiming_uniform_(torch.Tensor(1, *self.w_size)))
        self.tanh = nn.Tanh()
        self.avgpool = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        self.maxpool = nn.Sequential(nn.AdaptiveMaxPool2d(1), nn.Flatten())
        self.w_creator = nn.Linear(out_c*2, np.prod(self.w_size), bias=False)
        self.w_input_first = nn.Parameter(torch.Tensor(1, out_c, 1, 2).normal_(std=1e-5))
        
    def metaconv(self, x, w):
        w_ = w.reshape(w.shape[0] * w.shape[1], *w.shape[2:])
        x_ = x.reshape(1, x.shape[0] * x.shape[1], *x.shape[2:])
        out = F.conv2d(x_, w_, None, stride=self.stride,
                       groups=x.shape[0], padding=self.padding)
        return out.reshape(x.shape[0], w.shape[1], *out.shape[2:])

    def get_w_delta(self, x, w_input):
        if w_input is None:
            w_input = self.w_input_first.expand(x.shape[0], -1, -1, -1)
            w = self.w_base_weights.expand(x.shape[0], -1, -1, -1, -1)
        else:
            w_input, w = w_input

        features = self.tanh(torch.cat((self.avgpool(w_input), self.maxpool(w_input)), 1))
        w_delta = self.tanh(self.w_creator(features).reshape((-1, *self.w_size)))
        return w * 0.97 + w_delta * 0.03

    def forward(self, x, w_input=None):
        w = self.get_w_delta(x, w_input)
        return self.metaconv(x, w), w
        
class BasicBlockFB(nn.Module):
    def __init__(self, in_planes, planes, stride=1, repeats=1, prob_1=0.5):
        super(BasicBlockFB, self).__init__()
        self.repeats = repeats
        self.prob_1 = prob_1
        conv = WeightChangingConv if repeats != 1 else _Conv2d
        self.conv1 = conv(in_planes, planes, stride=stride)
        self.bn1 = nn.ModuleList([_BN2d(planes) for i in range(2*self.repeats-1)])
        self.seq = nn.Sequential(_act(), _Conv2d(planes, planes), _BN2d(planes))
        self.shortcut = \
            nn.Sequential(_Conv2d(in_planes, planes, kernel_size=1, stride=stride, padding=(stride-1)//2), _BN2d(planes)) \
            if stride != 1 or in_planes != planes else nn.Sequential()

    def forward(self, x):
        extra_conv1_data = []
        shortcut = self.shortcut(x)
        repeats = self.repeats
        if self.training: 
            repeats = [1, random.randint(1, self.repeats * 2 - 1)][random.uniform(0,1) > self.prob_1]
        for i in range(repeats):
            res = self.conv1(x, *extra_conv1_data)
            res, w = res if type(res) == tuple else (res, torch.tensor(1))
            res = F.relu(self.seq(self.bn1[i](res)) + shortcut, inplace=True)
            extra_conv1_data = [(res, w)]
        return res

class ResNet(nn.Module):
    def __init__(self, in_planes, repeats=[3, 3, 3], num_blocks=[3, 3, 3], num_channels=[16, 32, 64], num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = in_planes
        self.pre_layer = nn.Sequential(_Conv2d(3, self.in_planes), _BN2d(self.in_planes), _act())
        self.layer1 = self._make_layer(repeats[0], num_channels[0], num_blocks[0], stride=1)
        self.layer2 = self._make_layer(repeats[1], num_channels[1], num_blocks[1], stride=2)
        self.layer3 = self._make_layer(repeats[2], num_channels[2], num_blocks[2], stride=2)
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1), nn.Flatten(),
            nn.Linear(self.in_planes, num_classes)
        )

    def _make_layer(self, repeat, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        repeats = [1] * (num_blocks - 1) + [repeat]
        layers = []
        for stride, repeat in zip(strides, repeats):
            layers.append(BasicBlockFB(self.in_planes, planes, stride, repeat))
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.pre_layer(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return self.classifier(x)

In [6]:
class Residual(nn.Module):
    def __init__(self, *seq_models):
        super(Residual, self).__init__()
        self._model = nn.Sequential(*seq_models)
    def forward(self, x):
        return x + self._model(x)

def conv_bn_act(in_c, out_c, kernel_size=1, padding=0, stride=1, groups=1, is_bn=True, is_act=True, conv=_Conv2d, bn=_BN2d, act=_act):
    return nn.Sequential(
        conv(in_c, out_c, kernel_size=kernel_size, padding=padding, stride=stride, groups=groups),
        bn(out_c) if is_bn else nn.Identity(),
        act() if is_act else nn.Identity()
    )

def ibconv(c, mid_c, kernel_size=3):
    return Residual(
            conv_bn_act(c,mid_c),
            conv_bn_act(mid_c,mid_c,kernel_size=kernel_size, padding=(kernel_size-1)//2, groups=mid_c),
            conv_bn_act(mid_c,c)
        )

def ResNet(num_classes=10):
    return nn.Sequential(
        conv_bn_act(3,32,kernel_size=5, padding=2, stride=1),
        conv_bn_act(32,64,kernel_size=5, padding=2, stride=2),
        ibconv(64, 512),
        ibconv(64, 512),
        ibconv(64, 512),
        conv_bn_act(64,128,kernel_size=3, padding=1, stride=2),
        ibconv(128, 1024),
        ibconv(128, 1024),
        ibconv(128, 1024),
        conv_bn_act(128,128, stride=1, kernel_size=3),
        nn.AdaptiveAvgPool2d(1), nn.Flatten(),
        nn.Linear(128, num_classes)
    )

In [7]:
# model = ResNet(in_planes=in_planes_parameter, 
#                repeats=repeats_parameter, num_blocks=num_blocks_parameters, num_channels=num_channels_parameters)
model = ResNet()
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=.1, 
                      momentum=0.9, nesterov=True)
def lr_func(it):
    if it<1000:
        return 10 ** (-2 + (it / 1000) * (1.5))
    elif it < 10000:
        return 10 ** (-0.5 - ((it - 1000) / 10000) * 4)
    else:
        return 0.0001
lr_scheduler = optim.lr_scheduler.CyclicLR(optimizer, 1e-4, 1e-1)

In [8]:
# import matplotlib.pyplot as plt
# optimizer.step()
# x = []
# for i in range(100000):
#     lr_scheduler.step()
#     x.append(lr_scheduler.get_lr())
# plt.plot(x)
# plt.yscale('log')

In [9]:
def join_int(l):
    return '_'.join(map(str, l))

In [10]:
trial_name = f"resnet_with_feedback_1x1_inc_{in_planes_parameter}_repeats_{join_int(repeats_parameter)}_" + \
    f"num_blocks_{join_int(num_blocks_parameters)}_num_channels_{join_int(num_channels_parameters)}_" + \
    f"lr_{lr}_bs_{bs}"

In [11]:
tm = TrainingManager(trial_name, load=load, is_trial=is_trial)

In [12]:
tm.train(model, optimizer,
         trainloader, testloader,
         CrossEntropyLoss(), CrossEntropyLoss(),
         accuracy, accuracy, lr_scheduler=lr_scheduler, device=device, no_iterations=10000)

  0%|          | 0/10000 [00:00<?, ?it/s]

Start training trial: [34mresnet_with_feedback_1x1_inc_16_repeats_1_1_1_num_blocks_3_3_3_num_channels_16_32_64_lr_0.0002_bs_32[0m [31mis_trial[0m


{tr_loss: 0.61352, tr_acc: 0.78853, te_loss: 0.59492, te_acc: 0.79753}: 100%|██████████| 10000/10000 [04:30<00:00, 36.99it/s]
