In [1]:
# Resnet33
print('Resnet33')

Resnet33


In [12]:
import numpy as np
import time

N, D_in, H, D_out = 64, 1000, 100, 10
#input
a0 = np.random.randn(N, D_in) #Sizes are other way round
#output
a2 = np.random.randn(N, D_out)
#weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

tic = time.process_time()

for i in range(0, 500):
#Forward Pass
    z = np.dot(a0, w1)
    a1 = np.maximum(z, 0) #ReLU
    a2_hat = np.dot(a1, w2)

    #Backward Pass
    #Loss function is mean squared error
    loss = np.square(a2 - a2_hat)
    loss = np.sum(loss, axis = 1, keepdims = True)
    #if(i%100 == 99):
    #    print(loss)

    #Calculate dw1 and dw2
    #dw1 = 1/N*dw1
    da2 = -2*(a2-a2_hat)
    dw2 = np.dot(a1.T, da2)
    #print(dw2.shape)
    da1 = np.dot(da2, w2.T)
    #print(dwa1.shape)
    dz1 = np.maximum(da1, 0)
    #print(dz1.shape)
    dw1 = np.dot(a0.T, dz1)
    #print(dw1.shape)

    w1 = w1 - learning_rate*dw1
    w2 = w2 - learning_rate*dw2
    
    
toc = time.process_time()

print(1000*(toc - tic))

248265.51371000003


In [18]:
## This is a simple 2 layer Neural Network written using numpy

In [13]:
import torch
import time

dtype = torch.float
device = torch.device("cpu")

N, D_in, H, D_out = 64, 1000, 100, 10

#Input
a0=torch.randn(N, D_in, device=device, dtype=dtype)
print(a0.size())
#Output
a2=torch.randn(N, D_out, device=device, dtype=dtype)
print(a2.size())

#Weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6

tic = time.process_time()
#print(type(a0))
for i in range(0,500):
    #forward
    z1 = torch.mm(a0,w1)
    a1 = torch.clamp(z1, min=0)
    #print(a1.size())
    a2_hat = torch.mm(a1,w2)
    #print(a2_hat.size())

    loss = torch.sum(torch.pow((a2 - a2_hat),2),1)#Square + sum of rows
    #print((a2 - a2_hat).size())

    #Backprop
    da2 = -2*(a2 - a2_hat)
    dw2 = torch.mm(a1.transpose(0,1), da2)
    da1 = torch.mm(da2, w2.transpose(0,1))
    dz1 = torch.clamp(da1,min=0)
    dw1 = torch.mm(a0.transpose(0,1), dz1)

    w1 = w1 - learning_rate*dw1
    w2 = w2 - learning_rate*dw2
    
toc = time.process_time()

print(1000*(toc-tic))

torch.Size([64, 1000])
torch.Size([64, 10])
15212.888563000035


In [40]:
#Use Autograd - automatic gradient calculations

import torch
import time

N, D_in, H, D_out = 64, 1000, 100, 10

dtype = torch.float
device = torch.device("cuda:0")

a0 = torch.randn(N, D_in, dtype=dtype,device=device)
a2 = torch.randn(N, D_out, dtype=dtype,device=device)

w1 = torch.randn(D_in, H, dtype=dtype, device=device, requires_grad=True)
w2 = torch.randn(H, D_out, dtype=dtype, device=device, requires_grad=True)

learning_rate = 1e-6

for i in range(0, 500):
    
    a2_hat = torch.mm(torch.clamp(torch.mm(a0,w1), min=0),w2) #forward pass

    loss = torch.pow((a2 - a2_hat),2) #Cost function
    loss = loss.sum()
    if(i%100 == 99):
        print(i, loss)

    loss.backward() #Computes gradient of all tensors with requires_grad=True wrt scalar tensor 'loss'

    with torch.no_grad(): #Temporary disables gradient calculation
        w1 -= learning_rate*w1.grad #As a result only the value will change
        w2 -= learning_rate*w2.grad #We don't want pytorch to calculate the new gradients
        
        w1.grad.zero_() #If not used can cause weird results
        w2.grad.zero_() #Pytorch accumulates the gradients during backprop, otherwise gradient will point in another direction
    
    

99 tensor(395.5395, device='cuda:0', grad_fn=<SumBackward0>)
199 tensor(1.1924, device='cuda:0', grad_fn=<SumBackward0>)
299 tensor(0.0069, device='cuda:0', grad_fn=<SumBackward0>)
399 tensor(0.0002, device='cuda:0', grad_fn=<SumBackward0>)
499 tensor(3.7465e-05, device='cuda:0', grad_fn=<SumBackward0>)


In [54]:
#Torch nn module
import torch

class TwoLayerNet(torch.nn.Module): #Inherited class of torch.nn.Module class
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__() #needed
        
        self.linear1 = torch.nn.Linear(D_in, H) #sort of like function pointers
        self.activation1 = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(H, D_out)
        
    def forward(self, a0):
        z1 = self.linear1(a0)
        a1 = self.activation1(z1)
        a2 = self.linear2(a1)
        return a2

#Define parameters of NN
N, D_in, H, D_out = 64, 1000, 100, 10

dtype = torch.float
device = torch.device('cpu')


#Input and output
a0 = torch.randn(N, D_in, dtype=dtype, device=device)
a2 = torch.randn(N, D_out, dtype=dtype, device=device)

print(a0.size()[-1])

#Learning Rate
learning_rate = 1e-4

#Instantiate NN
model = TwoLayerNet(D_in, H, D_out)

print(model.parameters())

#Define loss function
loss_fn = torch.nn.MSELoss(reduction='sum') #Indicates output will be summed. Output size is NxD_out. So it will be summed to get a single scalar

#Define optimizer
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

for i in range(0, 1):
    
    #Forward pass
    a2_pred = model(a0)
    
    #Compute Loss
    loss = loss_fn(a2_pred, a2)
    
    print(i, loss)
        
    #Set gradients to zero
    #optimizer.zero_grad()
    
        
    #Backward pass
    loss.backward()
    
    #Update weights
    #optimizer.step()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate*param.grad
    
    model.zero_grad()
            


1000
<generator object Module.parameters at 0x7f6d745c5138>
0 tensor(690.2531, grad_fn=<MseLossBackward>)


In [54]:
#Residual_Stack_sim
import torch


inp = torch.randn(128,2,1024,1)
print(inp.size())

m = torch.nn.Conv2d(in_channels=2,out_channels=32,kernel_size=(1,1)) #equivalent to padding = 'SAME'
out1 = m(inp)
print(out1.size())

print(sum([param.nelement() for param in m.parameters()]))

ru1 = torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3,3), padding=(1,1))
out11 = ru1(out1)
print(out11.size())

print(sum([param.nelement() for param in ru1.parameters()]))

ru2 = torch.nn.BatchNorm2d(num_features=32)
out12 = ru1(out11)
print(out12.size())

print(sum([param.nelement() for param in ru2.parameters()]))

n = torch.nn.MaxPool2d(kernel_size=(2,1), stride=(2,1))
out2 = n(out12)
print(out2.size())

print(sum([param.nelement() for param in n.parameters()]))

torch.Size([128, 2, 1024, 1])
torch.Size([128, 32, 1024, 1])
96
torch.Size([128, 32, 1024, 1])
9248
torch.Size([128, 32, 1024, 1])
64
torch.Size([128, 32, 512, 1])
0


In [10]:
#Resnet
# y <-> cnn
#need to check affine in BatchNorm1d

import torch

class residual_unit(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(residual_unit, self).__init__()
        self.ru_conv1 = torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1, bias=True)
        self.ru_bn1 = torch.nn.BatchNorm1d(32, affine=training)
        self.ru_act1 = torch.nn.ReLU()
        self.ru_conv2 = torch.nn.Conv1d(in_channels=32, out_channels=32, kernel_size=3, padding=1, bias=True)
        self.ru_bn2 = torch.nn.BatchNorm1d(32, affine=training)
        self.ru_act2 = torch.nn.ReLU()
        
    def forward(self, x):
        y = self.ru_conv1(x)
        y = self.ru_bn1(y)
        y = self.ru_act1(y)
        y = self.ru_conv2(y)
        y = self.ru_bn2(y)
        y = y + x
        y = self.ru_act2(y)
        return y

class residual_stack(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(residual_stack, self).__init__()
        self.rs_conv1 = torch.nn.Conv1d(in_channels=C, out_channels=32, kernel_size=1, bias=False)
        self.rs_bn1 = torch.nn.BatchNorm1d(32, affine=training)
        self.rs_ru1 = residual_unit(N, C, L, W, training) #Create an object of the custom nn model
        self.rs_ru2 = residual_unit(N, C, L, W, training)
        self.rs_mp1 = torch.nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
        
    def forward(self, x):
        y = self.rs_conv1(x)
        y = self.rs_bn1(y)
        y = self.rs_ru1(y)
        y = self.rs_ru2(y)
        y = self.rs_mp1(y)
        return y
    
class resnet33(torch.nn.Module):
    def __init__(self, N, C, L, W, training=False):
        super(resnet33, self).__init__()
        self.rn33_rs1 = residual_stack(N, 2, 1024, training) #output is N*32*512
        self.rn33_rs2 = residual_stack(N, 32, 512, training) #output is N*32*256
        self.rn33_rs3 = residual_stack(N, 32, 256, training) #output is N*32*128
        self.rn33_rs4 = residual_stack(N, 32, 128, training) #output is N*32*64
        self.rn33_rs5 = residual_stack(N, 32, 64, training) #output is N*32*32
        self.rn33_rs6 = residual_stack(N, 32, 32, training) #output is N*32*16
        self.flat = torch.nn.Flatten() #output is N*512
        self.fc1 = torch.nn.Linear(512, 128) #output is N*128
        self.selu1 = torch.nn.SELU()
        self.alphadrop1 = torch.nn.AlphaDropout(p=0.95)
        self.fc2 = torch.nn.Linear(128, 128) #output is N*128
        self.selu2 = torch.nn.SELU()
        self.alphadrop2 = torch.nn.AlphaDropout(p=0.95)
        self.fc3 = torch.nn.Linear(128, 24) #output is N*24
        self.smx1 = torch.nn.Softmax()#dimension
        
    def forward(self, x):
        print('input' + str(x.size()))
        y = self.rn33_rs1(x)
        print(y.size())
        y = self.rn33_rs2(y)
        print(y.size())
        y = self.rn33_rs3(y)
        print(y.size())
        y = self.rn33_rs4(y)
        print(y.size())
        y = self.rn33_rs5(y)
        print(y.size())
        y = self.rn33_rs6(y)
        print(y.size())
        #85272 parameters
        y = self.flat(y)
        print(y.size())
        y = self.fc1(y)
        y = self.selu1(y)
        y = self.alphadrop1(y)
        print(y.size())
        y = self.fc2(y)
        y = self.selu2(y)
        y = self.alphadrop2(y)
        print(y.size())
        y = self.fc3(y)
        y = self.smx1(y)
        print(y.size())
        return y
        

#Initialization
N, C, L, W, modulation_classes = 128,2,1024,1,24
x = torch.randn(N,C,L)
y = torch.randn(N, modulation_classes)
training = True
learning_rate =  1e-4

#Instantiate Model
model = resnet33(N, C, L, W, training)

#Print Model for reference
print(model)

y_pred = model(x)

#Print number of parameters
print(sum([param.nelement() for param in model.parameters()]))
    
for name, param in model.named_parameters():
    if param.requires_grad:
        print('{:s}\t{:s}\t{:s}'.format(name.ljust(40), str(param.size()).ljust(30), str(param.nelement()).rjust(10)))

#Define Loss function
criterion = torch.nn.MSELoss(reduction='sum')

#Define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

if(False):
    for i in range(0, 500):
        #Calculate predicted values
        y_pred = model(x)

        #Calculate loss
        loss = criterion(y_pred, y)

        #print(i, loss)
        if(i%100 == 99):
            print(i, loss)

        #Set gradients to zero
        optimizer.zero_grad()

        #Backprop
        loss.backward()

        #Update parameters
        optimizer.step()
    

resnet33(
  (rn33_rs1): residual_stack(
    (rs_conv1): Conv1d(2, 32, kernel_size=(1,), stride=(1,), bias=False)
    (rs_bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
    (rs_ru1): residual_unit(
      (ru_conv1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
      (ru_bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      (ru_act1): ReLU()
      (ru_conv2): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
      (ru_bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      (ru_act2): ReLU()
    )
    (rs_ru2): residual_unit(
      (ru_conv1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
      (ru_bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      (ru_act1): ReLU()
      (ru_conv2): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
      (ru_bn2): BatchNorm1d(32, eps=1e-05, momentum=0

