In [1]:
import torch.nn.functional as F
import math
import torch
import torch.nn as nn
import torch.nn.parameter as Parameter


class SignumActivation(torch.autograd.Function):
    def forward(self, input):
        self.save_for_backward(input)
        size = input.size()
        mean = torch.mean(input.abs(), 1, keepdim=True)
        input = input.sign().add(0.01).sign()
        return input, mean

    def backward(self, grad_output, grad_output_mean): #STE Part
        input, = self.saved_tensors
        grad_input = grad_output.clone()
        grad_input=(2/torch.cosh(input))*(2/torch.cosh(input))*(grad_input)
        #grad_input[input.ge(1)] = 0 #great or equal
        #grad_input[input.le(-1)] = 0 #less or equal
        return grad_input

    
class BinarizeLinear(nn.Linear):
    def __init__(self, *kargs, **kwargs):
        super(BinarizeLinear, self).__init__(*kargs, **kwargs)

    def forward(self, input):
        #if input.size(1) != 784:
        #    input.data=input.data.sign().add(0.01).sign()
        if not hasattr(self.weight,'org'):
            self.weight.org=self.weight.data.clone()
        self.weight.data=self.weight.org.sign().add(0.01).sign()
        out = nn.functional.linear(input, self.weight)
        if not self.bias is None:
            self.bias.org=self.bias.data.clone()
            out += self.bias.view(1, -1).expand_as(out)
        return out


class BinConv2d(nn.Conv2d):
    def __init__(self, *kargs, **kwargs):
        super(BinConv2d, self).__init__(*kargs, **kwargs)

    def forward(self, input):
        #if input.size(1) != 3:
        #    input.data = input.data.sign().add(0.01).sign()
        if not hasattr(self.weight,'org'):
            self.weight.org=self.weight.data.clone()
        self.weight.data=self.weight.org.sign().add(0.01).sign()
        out = nn.functional.conv2d(input, self.weight, None, self.stride,
                                   self.padding, self.dilation, self.groups)
        if not self.bias is None:
            self.bias.org=self.bias.data.clone()
            out += self.bias.view(1, -1, 1, 1).expand_as(out)
        return out
      

class Unit_BinarizedConvolution2D(nn.Module):
    def __init__(self, input_channels, output_channels,
            kernel_size=-1, stride=-1, padding=-1, dropout=0, ActivationLayer=1, BNaffine=False):
        super(Unit_BinarizedConvolution2D, self).__init__()
        self.layer_type = 'BinConv2d'
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dropout_ratio = dropout
        if dropout!=0:
            self.dropout = nn.Dropout(dropout)
        self.ActivationLayer = ActivationLayer
        self.bn = nn.BatchNorm2d(output_channels, eps=1e-7, momentum=0.1, affine=BNaffine)
        if BNaffine==True:
            self.bn.weight.data = self.bn.weight.data.zero_().add(1.0)
        self.conv=BinConv2d(input_channels, output_channels,
                kernel_size=kernel_size, stride=stride, padding=padding)   #bias=False
    def forward(self, x):
       if self.dropout_ratio!=0:
            x = self.dropout(x)
       x = self.conv(x)
       x = self.bn(x)
       if self.ActivationLayer==1:
            x, mean = SignumActivation()(x)
       return x
    

class SignumActivationLayer(nn.Module):
    def forward(self, x):
        x, mean = SignumActivation()(x)
        return x
    
    

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.xnor = nn.Sequential(
        Unit_BinarizedConvolution2D(3, 64, kernel_size=3, stride=1, padding=1, ActivationLayer=1,BNaffine=False),
        Unit_BinarizedConvolution2D(64, 64, kernel_size=3, stride=1, padding=1, ActivationLayer=1,BNaffine=False),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=1),

        Unit_BinarizedConvolution2D(64, 128, kernel_size=3, stride=1, padding=1, ActivationLayer=1,BNaffine=False),
        Unit_BinarizedConvolution2D(128, 128, kernel_size=3, stride=1, padding=1, ActivationLayer=1,BNaffine=False),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            
        Unit_BinarizedConvolution2D(128, 256, kernel_size=3, stride=1, padding=1, ActivationLayer=1,BNaffine=False),
        Unit_BinarizedConvolution2D(256, 256, kernel_size=3, stride=1, padding=1, ActivationLayer=1,BNaffine=False),
        nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
        nn.AvgPool2d(kernel_size=4, stride=2, padding=0),
        SignumActivationLayer()
        )
        
        self.classifier = nn.Sequential(
            BinarizeLinear(256, 10, bias=False),
            nn.BatchNorm1d(10, affine=False),
            nn.Softmax()
        )
    def forward(self, x):
        #for m in self.modules():
        #    if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
        #        if hasattr(m.weight, 'data'):
        #            m.weight.data.clamp_(min=0.01)
        x = self.xnor(x)
        #x = x.view(x.size(0), 256)
        x = x.view(x.size(0), -1)
        #x = x.view(-1, 256)
        x = self.classifier(x)
        return x
      
      

In [3]:
import time
import random
import matplotlib.pyplot as plt
import torch.utils.data as D
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets,transforms
import argparse



In [4]:
def timeSince(since):
    now = time.time()
    s = now - since
    #m = math.floor(s / 60)
    #s -= m * 60
    return s

  

In [5]:
parser = argparse.ArgumentParser(description='CIFAR Binarized weights')
parser.add_argument('--batch-size', type=int, default=50, metavar='N',help='input batch size , default =64')
parser.add_argument('--test-batch-size', type=int, default=50, metavar='N',help='input batch size for testing default=64')
parser.add_argument('--epochs', type=int, default=400, metavar='N',help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.1, metavar='LR',help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M',help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',help='random seed,default=1)')
parser.add_argument('--eps', type=float, default=1e-5, metavar='LR',help='learning rate,default=1e-5')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',help='for printing  training data is log interval')
parser.add_argument('--best_acc', type=float, default=0, metavar='N',help='Record of best accuracy')


_StoreAction(option_strings=['--best_acc'], dest='best_acc', nargs=None, const=None, default=0, type=<class 'float'>, choices=None, help='Record of best accuracy', metavar='N')

In [6]:
#args = parser.parse_args()
#args.cuda = not args.no_cuda and torch.cuda.is_available()


In [7]:

#torch.manual_seed(args.seed)
#if args.cuda:
#    torch.cuda.manual_seed(args.seed)
torch.cuda.manual_seed(1)
    #train_loader
train_loader = D.DataLoader(datasets.CIFAR10('./data', train=True, download=True,
                                             transform=transforms.Compose([transforms.ToTensor()
                                                                           #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                                                                          ])) #transforms.RandomHorizontalFlip()
                            ,batch_size=10, shuffle=True) #500->args.test_batch_size
                                                 #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    
    #test_loaer
test_loader = D.DataLoader(datasets.CIFAR10('./data', train=False, 
                                            transform=transforms.Compose([transforms.ToTensor() #transforms.ToPILImage()
                                                                         
                                                                         ]))
                           ,batch_size=10, shuffle=False) #500->args.test_batch_size
 #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

Files already downloaded and verified


In [8]:
model=Net()
model.cuda()
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001) #args.momentum
#optimizer = optim.SGD(model.parameters(), lr=0.1,momentum=0.9) #args.momentum


In [9]:
def adjust_learning_rate(optimizer, epoch):
    update_list = [500, 600,700,800,900]
    if epoch in update_list:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.1
    return

In [10]:
def train(epoch):
    #global best_acc
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        #if args.cuda:
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)

        optimizer.zero_grad()
    
        loss.backward()
        
        for p in list(model.parameters()):
            if hasattr(p,'org'):
                p.data.copy_(p.org)
                
         
        optimizer.step()
        
        for p in list(model.parameters()):
            if hasattr(p,'org'):
                p.org.copy_(p.data.clamp_(-1,1))
        
                

        if batch_idx % 100 == 0: #100-> args.log_interval
            tlos.append(loss.data)
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data))


In [11]:
def test():
    global best_acc
    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(test_loader):
        inputs, targets = inputs.cuda(), targets.cuda()
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
    acc = 100. * correct / len(test_loader)
    if acc > best_acc: #args.best_acc
        best_acc = acc
        #save_state(model, best_acc)
    test_loss /= len(test_loader) 
       
    accur.append( 100.*correct/total)
    print(batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
    print('Best Accuracy:: ',best_acc)

In [12]:
start = time.time()
time_graph=[]
e=[]
accur=[]
tlos=[]
best_acc=0

In [None]:
for epoch in range(1, 900 + 1): #600 -> args.epochs
    adjust_learning_rate(optimizer, epoch)
    e.append(epoch)
    train(epoch)   
    seco=timeSince(start)
    time_graph.append(seco)
    test()

print(time_graph)
plt.title('Training for CIFAR10 with epoch', fontsize=20)
plt.ylabel('time (s)')
plt.plot(e,time_graph)
plt.show()
plt.title('Accuracy With epoch', fontsize=20)
plt.plot(e,accur)
plt.show()
plt.title('Test loss With epoch', fontsize=20)
plt.plot(tlos)
plt.show()

  input = module(input)


999 1000 Loss: 0.002 | Acc: 35.950% (3595/10000)
Best Accuracy::  359.5
999 1000 Loss: 0.002 | Acc: 39.150% (3915/10000)
Best Accuracy::  391.5
999 1000 Loss: 0.002 | Acc: 48.540% (4854/10000)
Best Accuracy::  485.4


999 1000 Loss: 0.002 | Acc: 50.010% (5001/10000)
Best Accuracy::  500.1
999 1000 Loss: 0.002 | Acc: 43.690% (4369/10000)
Best Accuracy::  500.1
999 1000 Loss: 0.002 | Acc: 54.150% (5415/10000)
Best Accuracy::  541.5


999 1000 Loss: 0.002 | Acc: 55.200% (5520/10000)
Best Accuracy::  552.0
999 1000 Loss: 0.002 | Acc: 50.490% (5049/10000)
Best Accuracy::  552.0
999 1000 Loss: 0.002 | Acc: 45.980% (4598/10000)
Best Accuracy::  552.0


999 1000 Loss: 0.002 | Acc: 51.230% (5123/10000)
Best Accuracy::  552.0
999 1000 Loss: 0.002 | Acc: 52.970% (5297/10000)
Best Accuracy::  552.0
999 1000 Loss: 0.002 | Acc: 59.500% (5950/10000)
Best Accuracy::  595.0


999 1000 Loss: 0.002 | Acc: 59.220% (5922/10000)
Best Accuracy::  595.0
999 1000 Loss: 0.002 | Acc: 54.130% (5413/10000)
Best Accuracy::  595.0
999 1000 Loss: 0.002 | Acc: 51.690% (5169/10000)
Best Accuracy::  595.0


999 1000 Loss: 0.002 | Acc: 61.290% (6129/10000)
Best Accuracy::  612.9
999 1000 Loss: 0.002 | Acc: 61.340% (6134/10000)
Best Accuracy::  613.4
999 1000 Loss: 0.002 | Acc: 60.880% (6088/10000)
Best Accuracy::  613.4
999 1000 Loss: 0.002 | Acc: 59.150% (5915/10000)
Best Accuracy::  613.4


999 1000 Loss: 0.002 | Acc: 60.330% (6033/10000)
Best Accuracy::  613.4
999 1000 Loss: 0.002 | Acc: 61.590% (6159/10000)
Best Accuracy::  615.9
999 1000 Loss: 0.002 | Acc: 58.000% (5800/10000)
Best Accuracy::  615.9


999 1000 Loss: 0.002 | Acc: 57.690% (5769/10000)
Best Accuracy::  615.9
999 1000 Loss: 0.002 | Acc: 60.280% (6028/10000)
Best Accuracy::  615.9
999 1000 Loss: 0.002 | Acc: 60.020% (6002/10000)
Best Accuracy::  615.9


999 1000 Loss: 0.002 | Acc: 59.580% (5958/10000)
Best Accuracy::  615.9
999 1000 Loss: 0.002 | Acc: 63.870% (6387/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 60.610% (6061/10000)
Best Accuracy::  638.7


999 1000 Loss: 0.002 | Acc: 58.800% (5880/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 61.780% (6178/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 60.300% (6030/10000)
Best Accuracy::  638.7


999 1000 Loss: 0.002 | Acc: 62.110% (6211/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 63.430% (6343/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 56.100% (5610/10000)
Best Accuracy::  638.7


999 1000 Loss: 0.002 | Acc: 58.300% (5830/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 59.840% (5984/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 61.410% (6141/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 63.160% (6316/10000)
Best Accuracy::  638.7


999 1000 Loss: 0.002 | Acc: 62.920% (6292/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 57.810% (5781/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 59.980% (5998/10000)
Best Accuracy::  638.7


999 1000 Loss: 0.002 | Acc: 63.050% (6305/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 62.550% (6255/10000)
Best Accuracy::  638.7
999 1000 Loss: 0.002 | Acc: 63.980% (6398/10000)
Best Accuracy::  639.8


999 1000 Loss: 0.002 | Acc: 64.750% (6475/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 58.580% (5858/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 62.840% (6284/10000)
Best Accuracy::  647.5


999 1000 Loss: 0.002 | Acc: 58.380% (5838/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 63.560% (6356/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 61.980% (6198/10000)
Best Accuracy::  647.5


999 1000 Loss: 0.002 | Acc: 61.170% (6117/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 59.210% (5921/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 56.740% (5674/10000)
Best Accuracy::  647.5


999 1000 Loss: 0.002 | Acc: 61.490% (6149/10000)
Best Accuracy::  647.5
999 1000 Loss: 0.002 | Acc: 57.510% (5751/10000)
Best Accuracy::  647.5
