<a href="https://colab.research.google.com/github/sachinkun21/CNN/blob/master/LeNet_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Introduction
LeNet-5, from the paper Gradient-Based Learning Applied to Document Recognition, is a very efficient convolutional neural network for handwritten character recognition.


<a href="http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf" target="_blank">Paper: <u>Gradient-Based Learning Applied to Document Recognition</u></a>

**Authors**: Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner

**Published in**: Proceedings of the IEEE (1998)




In [0]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class LeNet(nn.Module):
  def __init__(self):
    super(LeNet, self).__init__()
    self.conv1 = nn.Conv2d(1,6,5)
    self.conv2 = nn.Conv2d(6,16,5)
    self.fc1 = nn.Linear(16*4*4,120)
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,10)
  
  def forward(self,x):
    x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
    x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
    x = x.view(-1, self.num_flat_features(x))
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

  def num_flat_features(self,x):
    size = x.size()[1:] 
    num_features = 1
    for s in size:
        num_features *= s
    return num_features

lenet = LeNet()
print(lenet)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
params = list(lenet.parameters())
print(len(params))
print(params[2].size())  # conv1's .weight

10
torch.Size([16, 6, 5, 5])


In [4]:
inp = torch.randn(1, 1, 28,28)
out = lenet(inp)
print(out)

tensor([[-0.0430,  0.0578, -0.0827, -0.0121, -0.0453, -0.0078,  0.0431,  0.1317,
          0.0858, -0.0289]], grad_fn=<AddmmBackward>)


In [5]:
inp.dtype

torch.float32

In [6]:
import keras
from keras.datasets import mnist
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Flatten
from keras.models import Sequential

# Loading the dataset and perform splitting
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)
# Peforming reshaping operation
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)

# Normalization
x_train = x_train / 255
x_test = x_test / 255

# One Hot Encoding
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# Building the Model Architecture

Using TensorFlow backend.


(60000, 28, 28)


In [0]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(lenet.parameters(), lr=0.01)
criterion = nn.MSELoss()

#torch.set_default_tensor_type(torch.DoubleTensor)

input = torch.from_numpy(x_train).type(torch.float32)
target = torch.from_numpy(y_train).type(torch.float32)

In [8]:
input.shape,target.shape

(torch.Size([60000, 1, 28, 28]), torch.Size([60000, 10]))

# Training

In [9]:
x_train.shape,y_train.shape

((60000, 1, 28, 28), (60000, 10))

In [0]:
x_train = torch.from_numpy(x_train).type(torch.float32)
y_train = torch.from_numpy(y_train).type(torch.float32)

x_test = torch.from_numpy(x_test).type(torch.float32)
y_test = torch.from_numpy(y_test).type(torch.float32)

### Stochastic Training: Upgrading gradients for each data-point

In [11]:
import numpy as np

criterion = nn.MSELoss()
optimizer = optim.SGD(lenet.parameters(), lr=0.01)


for epoch in range(1,5):
  start = time.time()
  train_loss, valid_loss = [], []

  # Training 
  lenet.train()
  for data , target in zip(x_train,y_train):
      
      data = data.view(-1,1,28,28)
      target = target.view(-1,10)
      optimizer.zero_grad()

      # 1. Forward Propagation
      output = lenet(data)

      # 2. Loss Calculation
      loss = criterion(output, target)

      # 3. Backward propagation
      loss.backward()

      # 4. Weight Optimization
      optimizer.step()

      train_loss.append(loss.item())

  stop = time.time()
  # Evaluation 
  lenet.eval()
  for data, target in zip(x_test,y_test):
    data = data.view(-1,1,28,28)
    target = target.view(-1,10)

    output = lenet(data)
    loss = criterion(output , target)
    valid_loss.append(loss.item())
  print("Epoch:" , epoch, "\tTraining Loss:", np.mean(train_loss) , "\tValidation Loss:", np.mean(valid_loss), "\tTime Taken:" ,stop-start)

NameError: ignored

### Mini Batch Training: Upgrading Gradients for Each set(Batch_size)

In [14]:
import numpy as np
import time

criterion = nn.MSELoss()
optimizer = optim.SGD(lenet.parameters(), lr=0.01)
batch_size = 256
step_size = len(x_train)//128


# To creates batches of Data
def return_mini_batch( x_train,y_train,batch_size = 128):
  indexes = np.random.randint(0,len(x_train),batch_size)
  return  x_train[indexes],y_train[indexes]

# x_train_b ,y_train_b = return_mini_batch(x_train,y_train)
# x_train_b.shape

# TRAINING: MGD
for epoch in range(1,5):
  start = time.time()
  train_loss, valid_loss = [], []

  # Training 
  lenet.train()
  for step in range(step_size):
      data,target = return_mini_batch(x_train,y_train,step_size)
      optimizer.zero_grad()

      # 1. Forward Propagation
      output = lenet(data)

      # 2. Loss Calculation
      loss = criterion(output, target)

      # 3. Backward propagation
      loss.backward()

      # 4. Weight Optimization
      optimizer.step()

      train_loss.append(loss.item())
  stop = time.time()
  
  # Evaluation 
  lenet.eval()
  output = lenet(x_test)
  loss = criterion(output , y_test)
  valid_loss.append(loss.item())

  print("Epoch:" , epoch, "\tTraining Loss:", np.mean(train_loss) , "\tValidation Loss:", np.mean(valid_loss), "\tTime Taken:" ,stop-start)

Epoch: 1 	Training Loss: 0.085113576032285 	Validation Loss: 0.08393959701061249 	Time Taken: 38.121838331222534
Epoch: 2 	Training Loss: 0.08279042894768919 	Validation Loss: 0.08100586384534836 	Time Taken: 37.94103026390076
Epoch: 3 	Training Loss: 0.0794776533690528 	Validation Loss: 0.07720340043306351 	Time Taken: 38.080740451812744
Epoch: 4 	Training Loss: 0.07531313927700886 	Validation Loss: 0.07262440025806427 	Time Taken: 38.30573225021362


In [0]:
# import numpy as np
# indexes = np.random.randint(0,len(x_train),128)
# print(indexes)
# minibatch = x_train[indexes],y_train[indexes]
# len(minibatch)

In [0]:
from torchvision.datasets import MNIST

# Loading the MNIST Dataset and applying transform function
mnist = MNIST("data" , download = True , train = True)
mnist[0]