# Resnet33 for CIFAR-10

### Import Statements

In [32]:
#Resnet33 for images CIFAR-10 3*32*32 = 3*1024
#Resnet33 for images MNIST 1*28*28
#import statements
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

### Data Preprocessing

In [33]:
#Get Data and preprocess
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

#trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
#                                        download=True, transform=transform)

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                       download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                         shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                        shuffle=False, num_workers=2)

#classes = ('plane', 'car', 'bird', 'cat',
#           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

classes = ('0','1','2','3','4','5','6','7','8','9')

### Model Definition

In [37]:
#define model
class residual_unit(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(residual_unit, self).__init__()
        self.ru_conv1 = torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.ru_bn1 = torch.nn.BatchNorm1d(32, affine=training)
        self.ru_act1 = torch.nn.ReLU()
        self.ru_conv2 = torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.ru_bn2 = torch.nn.BatchNorm1d(32, affine=training)
        self.ru_act2 = torch.nn.ReLU()
        
    def forward(self, x):
        y = self.ru_conv1(x)
        y = self.ru_bn1(y)
        y = self.ru_act1(y)
        y = self.ru_conv2(y)
        y = self.ru_bn2(y)
        y = y + x
        y = self.ru_act2(y)
        return y

class residual_stack(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(residual_stack, self).__init__()
        self.rs_conv1 = torch.nn.Conv1d(in_channels=C, out_channels=32, kernel_size=3, padding=1)
        self.rs_bn1 = torch.nn.BatchNorm1d(32, affine=training)
        self.rs_ru1 = residual_unit(N, C, L, W, training) #Create an object of the custom nn model
        self.rs_ru2 = residual_unit(N, C, L, W, training)
        self.rs_mp1 = torch.nn.MaxPool1d(kernel_size=2, stride=2)
        
    def forward(self, x):
        y = self.rs_conv1(x)
        y = self.rs_bn1(y)
        y = self.rs_ru1(y)
        y = self.rs_ru2(y)
        y = self.rs_mp1(y)
        return y
    
class resnet33(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(resnet33, self).__init__()
        self.rn33_rs1 = residual_stack(N, 3, 1024,1, training) #output is N*32*512
        self.rn33_rs2 = residual_stack(N, 32, 512,1, training) #output is N*32*256
        self.rn33_rs3 = residual_stack(N, 32, 256,1, training) #output is N*32*128
        self.rn33_rs4 = residual_stack(N, 32, 128,1, training) #output is N*32*64
        self.rn33_rs5 = residual_stack(N, 32, 64,1, training) #output is N*32*32
        self.rn33_rs6 = residual_stack(N, 32, 32,1, training) #output is N*32*16
        self.flat = torch.nn.Flatten() #output is N*512
        self.fc1 = torch.nn.Linear(512, 128) #output is N*128
        self.selu1 = torch.nn.SELU()
        self.alphadrop1 = torch.nn.AlphaDropout(p=0.95)
        self.fc2 = torch.nn.Linear(128, 128) #output is N*128
        self.selu2 = torch.nn.SELU()
        self.alphadrop2 = torch.nn.AlphaDropout(p=0.95)
        self.fc3 = torch.nn.Linear(128, 10) #output is N*24
        self.smx1 = torch.nn.Softmax(dim=1)#dimension
        
    def forward(self, x):
        #print('input' + str(x.size()))
        y = self.rn33_rs1(x)
        #print(y.size())
        y = self.rn33_rs2(y)
        #print(y.size())
        y = self.rn33_rs3(y)
        #print(y.size())
        y = self.rn33_rs4(y)
        #print(y.size())
        y = self.rn33_rs5(y)
        #print(y.size())
        y = self.rn33_rs6(y)
        #print(y.size())
        #85272 parameters
        y = self.flat(y)
        #print(y.size())
        y = self.fc1(y)
        y = self.selu1(y)
        y = self.alphadrop1(y)
        #print(y.size())
        y = self.fc2(y)
        y = self.selu2(y)
        y = self.alphadrop2(y)
        #print(y.size())
        y = self.fc3(y)
        y = self.smx1(y)
        #print(y.size())
        return y
    
class resnet33_new(torch.nn.Module):
    def __init__(self):
        super(resnet33_new, self).__init__() #Consider only 1 residual stack layer
        self.conv1 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.maxpool = torch.nn.MaxPool1d(kernel_size=2, stride=2)
        
        self.conv2 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv3 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv4 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv5 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        
        self.conv6 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv7 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv8 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv9 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        
        self.conv10 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv11 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv12 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv13 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        
        self.conv14 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv15 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv16 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        self.conv17 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        
        self.conv18 = torch.nn.Sequential(torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1),torch.nn.BatchNorm1d(32))
        
        self.classifier = torch.nn.Sequential(torch.nn.Linear(512,128),
                                        #torch.nn.SELU(True),
                                        #torch.nn.AlphaDropout(p=0.95),
                                        torch.nn.ReLU(True),
                                        torch.nn.Linear(128,128),
                                        #torch.nn.SELU(True),
                                        #torch.nn.AlphaDropout(p=0.95), 
                                        torch.nn.ReLU(True),
                                        torch.nn.Linear(128,10))
                                        #torch.nn.Softmax(dim=1)) 
            
    def forward(self,x):
        #Residual Stack 1
        y1 = F.relu(self.conv1(x)) #1'st layer con
        y2 = F.relu(self.conv2(y1))
        y3 = self.maxpool(F.relu(self.conv3(y2) + y1))
        
        #Residual Stack 2
        y4 = self.conv4(y3)
        y5 = F.relu(self.conv5(y4))
        y6 = self.maxpool(F.relu(self.conv6(y5) + y4))
        
        #Residual Stack 3
        y4 = self.conv7(y6)
        y5 = F.relu(self.conv8(y4))
        y6 = self.maxpool(F.relu(self.conv9(y5) + y4))
        
        #Residual Stack 4
        y4 = self.conv10(y6)
        y5 = F.relu(self.conv11(y4))
        y6 = self.maxpool(F.relu(self.conv12(y5) + y4))
        
        #Residual Stack 5
        y4 = self.conv13(y6)
        y5 = F.relu(self.conv14(y4))
        y6 = self.maxpool(F.relu(self.conv15(y5) + y4))
        
        #Residual Stack 6
        y4 = self.conv16(y6)
        y5 = F.relu(self.conv17(y4))
        y6 = self.maxpool(F.relu(self.conv18(y5) + y4))
        
        y7 = torch.flatten(y6,1)
        y8 = self.classifier(y7)
        
        return y8
        
            

### Define Optimizer, Loss Function

In [38]:
#trainset
#testset
N, C, L, W, modulation_classes = 4,1,1024,1,10
#x = torch.randn(N,C,L)
#y = torch.randn(N, modulation_classes)
training = True
learning_rate =  1e-3

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

#Instantiate Model
model = resnet33_new()
model.to(device)

#Print Model for reference
print(model)

train_data = iter(trainloader) #train_data is a list of 12500 lists. Each list is [input tensor, label tensor]. Input tensor is of size(4,3,32,32), output tensor is of size(4)
test_data = iter(testloader)

#print(train_data.next()[1].size())

#Print number of parameters
print(sum([param.nelement() for param in model.parameters()]))
    
for name, param in model.named_parameters():
    #if param.requires_grad:
    print('{:s}\t{:s}\t{:s}'.format(name.ljust(40), str(param.size()).ljust(30), str(param.nelement()).rjust(10)))

#Define Loss function
#criterion = torch.nn.MSELoss(reduction='sum')
criterion = torch.nn.CrossEntropyLoss()

#Define optimizer
#SGD
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-5)
#Adam's algo
#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=opt.wd) #What is opt? looks like torch.opt

cuda:0
resnet33_new(
  (conv1): Sequential(
    (0): Conv1d(1, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Sequential(
    (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): Sequential(
    (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv4): Sequential(
    (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv5): Sequential(
    (0): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=Tru

### Train Network

In [39]:
import time
tic = time.process_time()
for epoch in range(0,10): #training steps
    running_loss = 0.0
    for i,data in enumerate(trainloader,0): #Loop through data
        inputs, labels = data[0].to(device), data[1].to(device) #Batch size of 4
        inputs_new = torch.zeros([4,1,32,32], dtype=torch.float32, device=device)
        inputs_new[:,:,2:30,2:30] = inputs
        inputs_new = inputs_new.reshape(4,1,1024)

        outputs = model(inputs_new) #Get output
        
        loss = criterion(outputs, labels)#Get loss
        
        #optimizer.zero_grad()
        
        loss.backward() #Back propagation
        
        #optimizer.step()
        
        #print(loss.item())
        #print('Hello World before')
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            
        #print('Hello World mid')
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate*param.grad
            
        
        #print('Hello World after')
        #print(learning_rate)
        model.zero_grad()
        
        
toc = time.process_time()           
print(toc-tic)
print('Finished Training')


#Save Network
#PATH = './cifar_net.pth'
#torch.save(net.state_dict(), PATH) 
        

[1,  2000] loss: 1.709
[1,  4000] loss: 0.557
[1,  6000] loss: 0.303
[1,  8000] loss: 0.220
[1, 10000] loss: 0.180
[1, 12000] loss: 0.148
[1, 14000] loss: 0.143
[2,  2000] loss: 0.122
[2,  4000] loss: 0.116
[2,  6000] loss: 0.103
[2,  8000] loss: 0.103
[2, 10000] loss: 0.113
[2, 12000] loss: 0.095
[2, 14000] loss: 0.098
[3,  2000] loss: 0.080
[3,  4000] loss: 0.076
[3,  6000] loss: 0.075
[3,  8000] loss: 0.074
[3, 10000] loss: 0.069
[3, 12000] loss: 0.073
[3, 14000] loss: 0.080
[4,  2000] loss: 0.062
[4,  4000] loss: 0.061
[4,  6000] loss: 0.063
[4,  8000] loss: 0.056
[4, 10000] loss: 0.057
[4, 12000] loss: 0.055
[4, 14000] loss: 0.061
[5,  2000] loss: 0.049
[5,  4000] loss: 0.044
[5,  6000] loss: 0.053
[5,  8000] loss: 0.051
[5, 10000] loss: 0.050
[5, 12000] loss: 0.054
[5, 14000] loss: 0.051
[6,  2000] loss: 0.042
[6,  4000] loss: 0.040
[6,  6000] loss: 0.041
[6,  8000] loss: 0.045
[6, 10000] loss: 0.047
[6, 12000] loss: 0.052
[6, 14000] loss: 0.044
[7,  2000] loss: 0.031
[7,  4000] 

### Class wise accuracy

In [45]:
#Save Network
PATH = './resnet33_new_relu.pth'
torch.save(model.state_dict(), PATH) 

model1 = resnet33_new()
model1.to(device)
model1.load_state_dict(torch.load(PATH))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        images_new = torch.zeros([4,1,32,32], dtype=torch.float32, device=device)
        images_new[:,:,2:30,2:30] = images
        images_new = images_new.reshape(4,1,1024) 
        outputs = model1(images_new)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %f %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of     0 : 99.285714 %
Accuracy of     1 : 99.207048 %
Accuracy of     2 : 98.643411 %
Accuracy of     3 : 98.316832 %
Accuracy of     4 : 98.879837 %
Accuracy of     5 : 98.991031 %
Accuracy of     6 : 98.747390 %
Accuracy of     7 : 98.638132 %
Accuracy of     8 : 99.486653 %
Accuracy of     9 : 96.630327 %


### Total accuracy across all classes

In [44]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        images_new = torch.zeros([4,1,32,32], dtype=torch.float32, device=device)
        images_new[:,:,2:30,2:30] = images
        images_new = images_new.reshape(4,1,1024) 
        outputs = model1(images_new)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %f %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 98.680000 %
