In [1]:
### Importing Dependencies
# Imports
import glob
import time
import sklearn.decomposition
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torchvision.utils import make_grid
from torchvision.datasets import ImageFolder
from torchvision import datasets, transforms

from PIL import Image
from skimage.util import random_noise

In [3]:
##### Set device either GPU or CPU
# inform the user if the notebook uses GPU or CPU.
def set_device():
  device = "cuda" if torch.cuda.is_available() else "cpu"
  if device != "cuda":
    print("GPU is not enabled in this notebook. \n"
          "If you want to enable it, in the menu under `Runtime` -> \n"
          "`Hardware accelerator.` and select `GPU` from the dropdown menu")
  else:
    print("GPU is enabled in this notebook. \n"
          "If you want to disable it, in the menu under `Runtime` -> \n"
          "`Hardware accelerator.` and select `None` from the dropdown menu")

  return device

device = set_device()

GPU is not enabled in this notebook. 
If you want to enable it, in the menu under `Runtime` -> 
`Hardware accelerator.` and select `GPU` from the dropdown menu


In [7]:
train_data = glob.glob("/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/train/images/*.jpg")
test_data = glob.glob("/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/test/images/*.jpg")



### Data Loading

In [10]:


# Training dataset
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=64,
                                           shuffle=True,
                                           num_workers=2)
# Test dataset
test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=64,
                                          shuffle=True,
                                          num_workers=2)


# Spatial Transformer on Image


In [11]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.conv2_drop = nn.Dropout2d()
    self.fc1 = nn.Linear(320, 50)
    self.fc2 = nn.Linear(50, 10)

    # Spatial transformer localization-network
    self.localization = nn.Sequential(
        nn.Conv2d(1, 8, kernel_size=7),
        nn.MaxPool2d(2, stride=2),
        nn.ReLU(True),
        nn.Conv2d(8, 10, kernel_size=5),
        nn.MaxPool2d(2, stride=2),
        nn.ReLU(True)
    )

    # Regressor for the 3 * 2 affine matrix
    self.fc_loc = nn.Sequential(
        nn.Linear(10 * 3 * 3, 32),
        nn.ReLU(True),
        nn.Linear(32, 3 * 2)
    )

    # Initialize the weights/bias with identity transformation
    self.fc_loc[2].weight.data.zero_()
    self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

  # Spatial transformer network forward function
  def stn(self, x):
    xs = self.localization(x)
    xs = xs.view(-1, 10 * 3 * 3)
    theta = self.fc_loc(xs)
    theta = theta.view(-1, 2, 3)

    grid = F.affine_grid(theta, x.size())
    x = F.grid_sample(x, grid)

    return x

  def forward(self, x):
    # transform the input
    x = self.stn(x)

    # Perform the usual forward pass
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    x = x.view(-1, 320)
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)



# Training the model

Now, let’s use the SGD algorithm to train the model. The network is learning the classification task in a supervised way. In the same time the model is learning STN automatically in an end-to-end fashion.


In [13]:
def train(train_loader, optimizer, epoch, device):

  model.train()
  for batch_idx, (*data, target) in enumerate(train_loader):
    target = np.asarray(target)
    target = torch.from_numpy(target.astype('long'))    
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % 500 == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))

"""### Test Function"""

def test(test_loader, device):

  with torch.no_grad():
    model.eval()
    test_loss, correct = 0, 0

    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)

      # sum up batch loss
      test_loss += F.nll_loss(output, target, size_average=False).item()
      # get the index of the max log-probability
      pred = output.max(1, keepdim=True)[1]
      correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
          .format(test_loss, correct, len(test_loader.dataset),
                  100. * correct / len(test_loader.dataset)))



In [None]:
# Run Train and Test the data

In [14]:
num_epochs = 20
for epoch in range(1, num_epochs + 1):
  train(train_loader, optimizer, epoch, device)
  test(test_loader, device)


NameError: name 'optimizer' is not defined

### Data Preprocessing in pytorch


In [24]:
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T
import torch
import torch.nn as nn
from torchvision.utils import make_grid
from torchvision.utils import save_image
from IPython.display import Image
import matplotlib.pyplot as plt
import numpy as np
import random
%matplotlib inline

In [30]:
####checking for device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [38]:
transformer = transforms.Compose([
    transforms.Resize([300,300]),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),   ##### 0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5],   # 0-1 to [-1,1], formula{x-mean}/std
                         [0.5,0.5,0.5])
])

In [40]:
##### DataLoader
###Path for training and testing directory
train_path = "/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/train/images"
test_path = "/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/test/images"

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform= transformer),
    batch_size = 20, suffle = True

)
test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform = transformer),
    batch_size= 20, suffle = True
)




FileNotFoundError: Couldn't find any class folder in /home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/train/images.

In [36]:
train_dataset_path = "./home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/train/images"
test_dataset_path = "/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/test/images"

IsADirectoryError: [Errno 21] Is a directory: '/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/train/images'

In [27]:
import torch
import torchvision

# read a png image
img = torchvision.io.read_image('/home/chhabilal/Desktop/treeleaf/OCR_project/CNN_alignment/dataset/train/images/4.jpg')

# display the properties of image
print("Image data:\n", img)
print(img.size())
print(type(img))

# display the png image
# convert the image tensor to PIL image
img = torchvision.transforms.ToPILImage()(img)

# display the PIL image
img.show()

Image data:
 tensor([[[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]],

        [[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]],

        [[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]]], dtype=torch.uint8)
torch.Size([3, 416, 416])
<class 'torch.Tensor'>


display display: Unable to load font (-*-helvetica-medium-r-normal--12-*-*-*-*-*-iso8859-1).


In [42]:
import keras
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

2022-04-29 12:23:06.860206: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-29 12:23:06.860270: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 