<a href="https://colab.research.google.com/github/liuyao12/Ranger-Mish-ImageWoof-5/blob/master/Woof_128_ep5_ConvTwist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet with a Twist

> ConvTwist + Mish + MaxBlurPool + restrick

# setup and imports

In [0]:
!pip install git+https://github.com/ayasyrev/model_constructor

In [0]:
# pip install git+https://github.com/kornia/kornia

In [0]:
!pip install git+https://github.com/ayasyrev/imagenette_experiments

In [0]:
from imagenette_experiments.train_utils import *

In [0]:
from kornia.contrib import MaxBlurPool2d

In [0]:
from fastai.basic_train import *
from fastai.vision import *
# from fastai.script import *
from model_constructor.net import Net, act_fn
from model_constructor.layers import SimpleSelfAttention, ConvLayer

# Twist

In [0]:
class ConvTwist(nn.Module):  # replacing 3x3 Conv2d
    def __init__(self, ni, nf, stride=1, init=0.7, iters=2):
        super(ConvTwist, self).__init__()
        self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False)
        self.grad_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]])
        self.grad_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]])
        std = self.conv.weight.std().item()
        self.coeff_x = nn.Parameter(torch.empty((nf,ni)).normal_(0, std), requires_grad=True)
        self.coeff_y = nn.Parameter(torch.empty((nf,ni)).normal_(0, std), requires_grad=True)
        self.center_x = nn.Parameter(torch.empty(nf).normal_(0, init), requires_grad=False)
        self.center_y = nn.Parameter(torch.empty(nf).normal_(0, init), requires_grad=False)
        # self.center_x = nn.Parameter(torch.empty((ni,nf)).normal_(0, init), requires_grad=False)
        # self.center_y = nn.Parameter(torch.empty((ni,nf)).normal_(0, init), requires_grad=False)
        self.same = ni == nf and stride == 1 and iters > 1
        self.iters = iters
        self.ni = ni
        self.stride = stride

    def forward(self, inpt):
        out = self.conv(inpt)
        _, _, h, w = out.size()
        XX = torch.from_numpy(np.indices((1,1,h,w))[3]*2/w-1).type(out.dtype).to(out.device) 
        YY = torch.from_numpy(np.indices((1,1,h,w))[2]*2/h-1).type(out.dtype).to(out.device)
        kernel_x = self.coeff_x[:,:,None,None] * self.grad_x.to(out.device) + self.coeff_y[:,:,None,None] * self.grad_y.to(out.device) 
        kernel_y = kernel_x.transpose(2,3).flip(3)  # make conv_y a 90 degree rotation of conv_x
        out = out + (XX-self.center_x[:,None,None]) * F.conv2d(inpt, kernel_x, stride=self.stride, padding=1) + (YY-self.center_y[:,None,None]) * F.conv2d(inpt, kernel_y, stride=self.stride, padding=1)
        # for i in range(self.ni-1):
        #     out += (XX-self.center_x[i,:,None,None]) * F.conv2d(inpt[:,i:i+1], kernel_x[:,i:i+1], padding=1) + (YY-self.center_y[i,:,None,None]) * F.conv2d(inpt[:,i:i+1], kernel_y[:,i:i+1], padding=1)
        if self.same:
            out = inpt + out / self.iters
            for _ in range(self.iters - 1):
                out = out + (self.conv(out) + (XX-self.center_x[:,None,None]) * F.conv2d(out, kernel_x, padding=1) + (YY-self.center_y[:,None,None]) * F.conv2d(out, kernel_y, padding=1)) / self.iters
            out = out - inpt
        return out

# ResBlock

In [0]:
class NewLayer(nn.Sequential):
    """Basic conv layers block"""
    def __init__(self, ni, nf, ks=3, stride=1,
            act=True,  act_fn=nn.ReLU(inplace=True),
            bn_layer=True, bn_1st=True, zero_bn=False,
            padding=None, bias=False, groups=1, **kwargs):

        if padding==None: padding = ks//2
        if ks==3:  layers = [('ConvTwist', ConvTwist(ni, nf, stride=stride))]
        else: layers = [('Conv{}x{}'.format(ks,ks), 
                  nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]

        act_bn = [('act_fn', act_fn)] if act else []
        if bn_layer:
            bn = nn.BatchNorm2d(nf)
            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
            act_bn += [('bn', bn)]
        if bn_1st: act_bn.reverse()
        layers += act_bn
        super().__init__(OrderedDict(layers))

In [0]:
class NewResBlock(Module):
    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True):
        nf,ni = nh*expansion,ni*expansion
        conv_layer = NewLayer
        self.reduce = noop if stride==1 else pool
        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ] if expansion == 1 else [
                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)), #!!!
                   (f"conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ]
        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

# Model Constructor

In [0]:
pool = MaxBlurPool2d(3, True)

In [0]:
model = Net(c_out=10, layers=[3,4,6,3], expansion=4)

In [0]:
model.block = NewResBlock
# model.conv_layer = NewLayer
model.pool = pool
model.stem_pool = pool
model.stem_sizes = [3,32,64,64]
model.act_fn = Mish()
model.sa = True

# Runs and results



In [0]:
for _ in range(5):
    learn = get_learn(model=model, size=128, bs=32, mixup=0)
    learn.fit_fc(5, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)

data path   /root/.fastai/data/imagewoof2
Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.946215,1.757429,0.456096,0.886994,01:52
1,1.645747,1.520635,0.546449,0.933571,01:52
2,1.51475,1.365968,0.630695,0.954187,01:52
3,1.376637,1.250549,0.687452,0.956223,01:52
4,1.146112,1.098631,0.762789,0.970985,01:51


data path   /root/.fastai/data/imagewoof2
Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.94527,1.823008,0.412828,0.874268,01:50
1,1.639256,1.503022,0.562484,0.936625,01:50
2,1.470488,1.364844,0.632985,0.946297,01:50
3,1.377457,1.248505,0.688216,0.958259,01:50
4,1.157462,1.096244,0.761771,0.973276,01:50


data path   /root/.fastai/data/imagewoof2
Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.870727,1.804891,0.421227,0.886231,01:50
1,1.629468,1.501021,0.567829,0.92899,01:49
2,1.50362,1.398608,0.623059,0.951133,01:49
3,1.358598,1.269784,0.688216,0.95495,01:49
4,1.145114,1.089332,0.766098,0.972512,01:49


data path   /root/.fastai/data/imagewoof2
Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.919392,1.774163,0.434207,0.885722,01:48
1,1.690776,1.52114,0.545431,0.936371,01:49
2,1.510333,1.360296,0.640112,0.941715,01:48
3,1.35135,1.264057,0.68847,0.95495,01:48
4,1.166215,1.093398,0.770934,0.971494,01:49


data path   /root/.fastai/data/imagewoof2
Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.933465,1.742338,0.460168,0.890048,01:49
1,1.667284,1.604843,0.519216,0.920845,01:49
2,1.533159,1.367563,0.62535,0.951387,01:48
3,1.396665,1.259073,0.681853,0.958514,01:48
4,1.169616,1.105028,0.763808,0.968694,01:49


In [9]:
np.array([0.762789, 0.761771, 0.766098, 0.770934, 0.763808]).mean()

0.76508