In [1]:
 import torch
 import torch.nn as nn 
 import torchvision
 import torchvision.transforms as transforms
 import matplotlib.pyplot as plt

**Setting the Parameters**

In [11]:
 input_size = 784 # 28x28
 hidden_size = 128 
 num_classes = 10
 num_epochs = 100
 batch_size = 1000
 learning_rate = 0.001 

**Fetching the Dataset**

In [3]:
train_dataset = torchvision.datasets.MNIST(root='./data',train=True, transform=transforms.ToTensor(),download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',train=False, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



Loading the Data

In [12]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32,shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=10000, shuffle=False)

**DNN with Relu**

In [13]:
#DNN with Relu for pretraining

class NeuralNet_Relu(nn.Module):
     def __init__(self, input_size, hidden_size, num_classes):
         super(NeuralNet_Relu, self).__init__()
         self.input_size = input_size

         #Input Layer
         self.l=nn.Linear(input_size, hidden_size)

         #Hidden Layer
         self.l1 = nn.Linear(hidden_size, hidden_size) 
         self.relu = nn.ReLU()

         #Output Layer
         self.l2 = nn.Linear(hidden_size, num_classes)
         self.softmax=nn.Softmax()


     #Network(): Creating and appending Network Layer          
     def Network(self):
       self.Layers=[]
       self.Layers.append(self.l)
       for i in range(0,4):
         self.Layers.append(self.l1) 
       self.Layers.append(self.l2)
       return self.Layers


     #Forward():Feed Forward Logic
     def forward(self,x,Layers):
         out=x
         for i,Layer in enumerate(Layers):
           if(i==5):
             out=Layer(out)
           else: 
             out = Layer(out)
             out = self.relu(out)

         return out

**DNN for FLNPF**

In [14]:
#DNN Class For FLNPF
class NeuralNet_NPF(nn.Module):
     def __init__(self, input_size, hidden_size, num_classes):
         super(NeuralNet_NPF, self).__init__()
         self.input_size = input_size
         #Input Layer
         self.l=nn.Linear(input_size, hidden_size)

         #Hidden Layer
         self.l1 = nn.Linear(hidden_size, hidden_size) 
         self.relu = nn.ReLU()

         #Output Layer
         self.l2 = nn.Linear(hidden_size, num_classes)

     #Create_Layer() : Appending all the Network layers together 
     def Create_Network_Layer(self):
       self.Layers=[]
       self.Layers.append(self.l)
       for i in range(0,4):
         self.Layers.append(self.l1) 
       self.Layers.append(self.l2)
       return self.Layers

     #forward(): Feed Forward the images 
     def forward(self,images,Layer_FLNPF):
      XP=images
      XV=images

      #Feed Forward Logic
      for j,layer in enumerate((Layer_FLNPF)):
        if j==5:
          outputs=layer(XV)
        else:
          XP=torch.relu(layer(XP))
          XV=layer(XV)
          G=torch.sign(XP)
          XV=XV*G.detach()
      
      return outputs


***Model Generation and Training***

In [19]:
#Creating a DNN with Relu model

Model_Relu = NeuralNet_Relu(input_size, hidden_size, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Model_Relu.parameters(),lr=learning_rate)

In [20]:
#Training the DNN with Relu

Layers=Model_Relu.Network()

n_total_steps = len(train_loader)

for epoch in range(num_epochs):
     for i, (images,labels) in enumerate(train_loader):  

         images = images.reshape(-1, 28*28)
         labels = labels

         outputs = Model_Relu.forward(images,Layers)

         loss = criterion(outputs, labels)
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()

         if (i+1) % 100 == 0:
             print (f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/100], Step[100/1875], Loss: 0.4224
Epoch [1/100], Step[200/1875], Loss: 0.2973
Epoch [1/100], Step[300/1875], Loss: 0.2528
Epoch [1/100], Step[400/1875], Loss: 0.2740
Epoch [1/100], Step[500/1875], Loss: 0.3645
Epoch [1/100], Step[600/1875], Loss: 0.2965
Epoch [1/100], Step[700/1875], Loss: 0.2421
Epoch [1/100], Step[800/1875], Loss: 0.2408
Epoch [1/100], Step[900/1875], Loss: 0.1462
Epoch [1/100], Step[1000/1875], Loss: 0.1641
Epoch [1/100], Step[1100/1875], Loss: 0.4514
Epoch [1/100], Step[1200/1875], Loss: 0.2391
Epoch [1/100], Step[1300/1875], Loss: 0.1006
Epoch [1/100], Step[1400/1875], Loss: 0.1558
Epoch [1/100], Step[1500/1875], Loss: 0.5691
Epoch [1/100], Step[1600/1875], Loss: 0.0610
Epoch [1/100], Step[1700/1875], Loss: 0.0273
Epoch [1/100], Step[1800/1875], Loss: 0.1539
Epoch [2/100], Step[100/1875], Loss: 0.0886
Epoch [2/100], Step[200/1875], Loss: 0.0155
Epoch [2/100], Step[300/1875], Loss: 0.1948
Epoch [2/100], Step[400/1875], Loss: 0.0450
Epoch [2/100], Step[500

**Creating and Training FLNPF Model**

In [21]:
#Loading the above Trained Relu Layer 

Layer_Relu=Model_Relu.Network()

#Creating New FLNPF Model
FLNPF_Model=NeuralNet_NPF(input_size, hidden_size, num_classes)

Layer_FLNPF=FLNPF_Model.Create_Network_Layer()

optimizer = torch.optim.Adam(FLNPF_Model.parameters(), lr=learning_rate)


#Copying the Weights and biases from relu trained model to the new model
for layer_relu,layer_FLNPF in zip(Layer_Relu,Layer_FLNPF):
  layer_FLNPF.weight=layer_relu.weight
  layer_FLNPF.bias=layer_relu.bias
  layer_FLNPF.requires_grad=False


#Training the New Model over train data
final_train_acc=0
for epoch in range(num_epochs):
     n_samples=0
     n_correct=0
     for i, (images, labels) in enumerate(train_loader):  

         images = images.reshape(-1, 28*28)
         labels = labels

         outputs=FLNPF_Model(images,Layer_FLNPF)

         loss = criterion(outputs, labels)
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()

         _, predicted = torch.max(outputs.data, 1)
         n_samples += labels.size(0)
         n_correct += (predicted == labels).sum().item()  

         if (i+1) % 100 == 0:
             print (f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')


     acc = 100.0 * n_correct / n_samples
     print("------Epoch:",epoch)
     #Acuracy after every Epoch
     print("Accuracy",acc)
     final_train_acc=max(final_train_acc,acc)
     
#Max Accuracy over all epochs
print("Max Training Accuracy")
print(final_train_acc)

Epoch [1/100], Step[100/1875], Loss: 0.0000
Epoch [1/100], Step[200/1875], Loss: 0.0000
Epoch [1/100], Step[300/1875], Loss: 0.0000
Epoch [1/100], Step[400/1875], Loss: 0.0000
Epoch [1/100], Step[500/1875], Loss: 0.0000
Epoch [1/100], Step[600/1875], Loss: 0.0000
Epoch [1/100], Step[700/1875], Loss: 0.0001
Epoch [1/100], Step[800/1875], Loss: 0.0041
Epoch [1/100], Step[900/1875], Loss: 0.0000
Epoch [1/100], Step[1000/1875], Loss: 0.0000
Epoch [1/100], Step[1100/1875], Loss: 0.0001
Epoch [1/100], Step[1200/1875], Loss: 0.0000
Epoch [1/100], Step[1300/1875], Loss: 0.0002
Epoch [1/100], Step[1400/1875], Loss: 0.0386
Epoch [1/100], Step[1500/1875], Loss: 0.0174
Epoch [1/100], Step[1600/1875], Loss: 0.0000
Epoch [1/100], Step[1700/1875], Loss: 0.0000
Epoch [1/100], Step[1800/1875], Loss: 0.0000
------Epoch: 0
Accuracy 99.92833333333333
Epoch [2/100], Step[100/1875], Loss: 0.0000
Epoch [2/100], Step[200/1875], Loss: 0.0000
Epoch [2/100], Step[300/1875], Loss: 0.0000
Epoch [2/100], Step[400/1

**Testing**

> FLNPF Model




In [23]:
#Testing Over Test Data

n_correct = 0
n_samples = 0
for images, labels in test_loader:

  images = images.reshape(-1, 28*28)
  labels = labels

  outputs=FLNPF_Model(images,Layer_FLNPF)  

  _, predicted = torch.max(outputs.data, 1)
  n_samples += labels.size(0)
  n_correct += (predicted == labels).sum().item()  
  
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 97.97 %


**DNN for Decoupled Learning**

In [None]:

class NeuralNet_DC(nn.Module):
     def __init__(self, input_size, hidden_size, num_classes):
         super(NeuralNet_DC, self).__init__()
         #Input Layer
         self.l=nn.Linear(input_size, hidden_size)

         #Hidden Layer
         self.l1 = nn.Linear(hidden_size, hidden_size) 
         self.relu = nn.ReLU()

         self.l2 = nn.Linear(hidden_size, num_classes)

     #Creating and appending networ Layers 
     def Network_Layer(self):
       self.Gate=[]
       self.input_size = input_size
       self.Gate.append(self.l)
       for i in range(0,4):
         self.Gate.append(self.l1) 
       self.Gate.append(self.l2)

     #Feed Forward Logic
     def forward(self,images):
      XP1=images
      XV=images
      beta=4
      Gate=torch.nn.Sigmoid()
      for j,layer in enumerate((self.Gate)):
        if j==5:
          outputs=layer(XV)
        else:
          XP=(layer(XP1))
          XP1=torch.relu(XP)
          XV=layer(XV)
          G=Gate(beta*XP)
          XV=XV*G.detach()
      return outputs

     def Network(self):
      return self.Gate

In [None]:
#Creating the Model
modelDC = NeuralNet_DC(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modelDC.parameters(), lr=learning_rate)
modelDC.Network_Layer()

In [None]:
#Training the DNN with Relu

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
     for i, (images, labels) in enumerate(train_loader):  

         images = images.reshape(-1, 28*28)
         labels = labels
         outputs = modelDC.forward(images)
         loss = criterion(outputs, labels)
        
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
         if (i+1) % 100 == 0:
             print (f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/100], Step[100/1875], Loss: 0.4213
Epoch [1/100], Step[200/1875], Loss: 0.2812
Epoch [1/100], Step[300/1875], Loss: 0.2650
Epoch [1/100], Step[400/1875], Loss: 0.4357
Epoch [1/100], Step[500/1875], Loss: 0.4460
Epoch [1/100], Step[600/1875], Loss: 0.2926
Epoch [1/100], Step[700/1875], Loss: 0.2359
Epoch [1/100], Step[800/1875], Loss: 0.5858
Epoch [1/100], Step[900/1875], Loss: 0.3063
Epoch [1/100], Step[1000/1875], Loss: 0.0685
Epoch [1/100], Step[1100/1875], Loss: 0.2761
Epoch [1/100], Step[1200/1875], Loss: 0.4213
Epoch [1/100], Step[1300/1875], Loss: 0.0639
Epoch [1/100], Step[1400/1875], Loss: 0.2911
Epoch [1/100], Step[1500/1875], Loss: 0.0981
Epoch [1/100], Step[1600/1875], Loss: 0.6109
Epoch [1/100], Step[1700/1875], Loss: 0.1914
Epoch [1/100], Step[1800/1875], Loss: 0.1683
Epoch [2/100], Step[100/1875], Loss: 0.4830
Epoch [2/100], Step[200/1875], Loss: 0.3963
Epoch [2/100], Step[300/1875], Loss: 0.0303
Epoch [2/100], Step[400/1875], Loss: 0.0656
Epoch [2/100], Step[500

In [None]:
#Testing the model 
n_correct = 0
n_samples = 0
for images, labels in test_loader:
  print(images.shape)
  images = images.reshape(-1, 28*28)
  print(images.shape)
  labels = labels
  outputs = modelDC(images)
  # max returns (value ,index)
  _, predicted = torch.max(outputs.data, 1)
  n_samples += labels.size(0)
  n_correct += (predicted == labels).sum().item()  
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')

torch.Size([10000, 1, 28, 28])
torch.Size([10000, 784])
Accuracy of the network on the 10000 test images: 98.16 %
