In [None]:
import torch
import os
import torch.nn.functional as F
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
class LRN(nn.Module):
    def __init__(self):
        super(LRN, self).__init__()

    def forward(self, x):
        div = x.pow(2).unsqueeze(1)
        self.alpha = 0.001
        self.beta = 0.75

        kernel_size = x.size(1)//8 + 1
        padding_size = (kernel_size-1)//2
        average = nn.AvgPool3d(kernel_size = (kernel_size,1,1),
                              stride = 1,
                              padding = (padding_size,0,0))
        div = average(div).squeeze(1)
        div = div.mul(self.alpha).add(1.0).pow(self.beta)
        out = x.div(div)
        return out
class RCL_block(nn.Module):
    def __init__(self):
        super(RCL_block,self).__init__()
        self.feedconv = nn.Conv2d(96,96, kernel_size = 3,
                            padding =1)
        self.reconv = nn.Conv2d(96,96, kernel_size = 3,
                               padding =1,bias = False)
    def forward(self, *args):

        for i,num in enumerate(args):
            if i == 0:
                u0 = num
            elif i == 1:
                u1 = num
            elif i == 2:
                u2 = num
            else:
                u3 = num


        if i == 0:

            conv0 = self.feedconv(u0)
            x0 = nn.ReLU().forward(conv0)
            x0 = LRN().forward(x0)
            x1 = conv0 + self.reconv(x0)
            x1 = nn.ReLU().forward(x1)
            x1 = LRN().forward(x1)
            x2 = conv0 + self.reconv(x1)
            x2 = nn.ReLU().forward(x2)
            x2 = LRN().forward(x2)
            x3 = conv0 + self.reconv(x2)
            x3 = nn.ReLU().forward(x3)
            x3 = LRN().forward(x3)
            return x0,x1,x2,x3
        else:

            x0 = self.feedconv(u0)
            x0 = nn.ReLU()(x0)
            x0 = LRN()(x0)
            x1 = self.feedconv(u1) + self.reconv(x0)
            x1 = nn.ReLU().forward(x1)
            x1 = LRN()(x1)
            x2 = self.feedconv(u2) + self.reconv(x1)
            x2 = nn.ReLU()(x2)
            x2 = LRN()(x2)
            x3 = self.feedconv(u3) + self.reconv(x2)
            x3 = nn.ReLU()(x3)
            x3 = LRN()(x3)
            return x0,x1,x2,x3



class RCNN(nn.Module):
    def __init__(self):
        super(RCNN,self).__init__()
        self.layer1 = nn.Sequential(
        nn.Conv2d(3,96, kernel_size = 5, padding =2),
        nn.ReLU(),
        nn.MaxPool2d(3, stride =2, padding=1))
        self.layer2 = RCL_block()

        self.layer3 = RCL_block()
        self.maxpool = nn.MaxPool2d(3, stride =2, padding=1)
        self.layer4 = RCL_block()
        self.layer5 = RCL_block()
        self.globalpool = nn.MaxPool2d(8)
        self.fc = nn.Linear(96*1,100)
    def forward(self, x):
        conv1 = self.layer1(x)
        conv1 = LRN()(conv1)       # print conv1.data.size()
        x20, x21, x22, x23 = self.layer2(conv1)
        #x20, x21, x22, x23 = self.layer2(conv1)
        x20 = nn.Dropout2d(p=0.2)(x20)
        x21 = nn.Dropout2d(p=0.2)(x21)
        x22 = nn.Dropout2d(p=0.2)(x22)
        x23 = nn.Dropout2d(p=0.2)(x23)
        x30, x31, x32, x33 = self.layer3(x20, x21, x22, x23)
        x33 = self.maxpool(x33)
        x32 = self.maxpool(x32)
        x31 = self.maxpool(x31)
        x30 = self.maxpool(x30)
        x33 = nn.Dropout2d(p=0.2)(x33)
        x32 = nn.Dropout2d(p=0.2)(x32)
        x31 = nn.Dropout2d(p=0.2)(x31)
        x30 = nn.Dropout2d(p=0.2)(x30)
        x40, x41, x42, x43 = self.layer4(x30,x31,x32,x33)
        x40 = nn.Dropout2d(p=0.2)(x40)
        x41 = nn.Dropout2d(p=0.2)(x41)
        x42 = nn.Dropout2d(p=0.2)(x42)
        x43 = nn.Dropout2d(p=0.2)(x43)
        x50, x51, x52, x53 = self.layer5(x40, x41, x42, x43)
       # x6 = torch.cat((x50,x51,x52,x53),1)

        out = self.globalpool(x53)

        out = out.view(-1, 96*1)

        out = self.fc(out)
        return out





if __name__ == '__main__':
    batch_size = 100
    learning_rate = 0.01
    num_epochs = 75
    #transform = transforms.ToTensor()
  #  transform = transforms.Compose([transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
    train_dataset = datasets.CIFAR100(
        root = './data',
        train = True,
        download = True,
        )
    mean_r = train_dataset.train_data[:,:,:,0].mean()/255
    mean_g = train_dataset.train_data[:,:,:,1].mean()/255
    mean_b = train_dataset.train_data[:,:,:,2].mean()/255
  #  print mean_r, mean_g, mean_b
    transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((mean_r,mean_g,mean_b),(1,1,1))])
    train_dataset = datasets.CIFAR100(
         root = './data',
         train = True,
         download = False,
         transform = transform)
    train_loader = DataLoader(train_dataset,
                         batch_size = batch_size,
                         shuffle = True,
                         num_workers = 2)

    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((mean_r,mean_g,mean_b),(1,1,1))])
    test_dataset = datasets.CIFAR100(
    root = './data',
        train = False,
        download = True,
        transform = transform)
    test_loader = DataLoader(test_dataset,
                        batch_size = batch_size,
                        shuffle = False,
                        num_workers = 2)


    rcnn = RCNN()
    rcnn.cuda()
    rcnn.train()
    #valid_set = torch.cuda.FloatTensor(100,3,32,32)
    #valid_set = Variable(valid_set)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(rcnn.parameters(), lr = learning_rate, momentum = 0.9,weight_decay = 1e-4,nesterov = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer ,mode = 'max',patience = 3, verbose = True,  min_lr =0.01*1/1000)

    for epoch in range(num_epochs):

        num = 0
        total_valid = 0
        correct_valid = 0
        for i, (images, labels) in enumerate(train_loader):
            rcnn.train()
            images = Variable(images).cuda()

        #   label = labels
            labels = Variable(labels).cuda()

        # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = rcnn.forward(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            if i >399:
                valid_images = images
                valid_label = labels.cpu().data
                total_valid = total_valid + valid_label.size(0)
                rcnn.eval()
                valid_out = rcnn(valid_images)
                _, valid_predicted = torch.max(valid_out.data,1)
                correct_valid += (valid_predicted.cpu() == valid_label).sum()
            if (i+1) % 100 == 0:
                num = num + 1
          # print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
                  # %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
        #scheduler.step(loss.data[0])
                print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
                   %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
                if num == 5:
                    print (total_valid, correct_valid)
                    valid_accuracy = (100.0 * correct_valid/total_valid)
                    print ('valid accuracy: %.2f'% (valid_accuracy))
                    scheduler.step(valid_accuracy)
        if epoch == 39 or epoch == 49 or epoch ==59:
            rcnn.eval()
            correct = 0
            total = 0

            for images, labels in test_loader:
                images = Variable(images).cuda()

                outputs = rcnn(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted.cpu() == labels).sum()

            print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))
    torch.save(rcnn,'rcnn.pkl')
    rcnn.eval()
    correct = 0
    total = 0

    for images, labels in test_loader:
        images = Variable(images).cuda()

        outputs = rcnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu() == labels).sum()

    print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))



