


# Data Preparation

This notebook focusses on the data preparation step for Bounding Box Regression Deep Learning Models

1) Plain CNN

2) VGG16
 
3) Inception ResNet

In [16]:
### We will import the raw data from kaggle
from google.colab import files
files.upload()



Saving kaggle.json to kaggle (1).json


{'kaggle.json': b'{"username":"marshnick","key":"5750c27e68d91d0db4a2b35d082318c3"}'}

In [17]:
### The data is stored in the below url
### https://www.kaggle.com/datasets/andrewmvd/car-plate-detection
### To import the dataset from kaggle

def download_kaggle():

  !mkdir /root/.kaggle
  !cp kaggle.json /root/.kaggle/
  !chmod 600 /root/.kaggle/kaggle.json 
  !kaggle datasets download -d andrewmvd/car-plate-detection --force
  !unzip -o -qq car-plate-detection.zip 

In [18]:
### importing the necessary libraries

%matplotlib inline

import pandas as pd
import os
import glob
import cv2
import numpy as np
from lxml import etree
from sklearn.model_selection import train_test_split

In [19]:
### The image size is set to 224*224 for all our deep learning models
IMAGE_SIZE = 224

def image_extract():

  ### To get the image directory. 
  image_dir = os.getcwd() + '/images/'

  ### We join a wilcard match to extract all png paths inside image directory
  image_path = os.path.join(image_dir,'*png')

  ### We access all image file names and store it in a list
  image_files = glob.glob(image_path)

  #We sort the images to maintain order with the xml files containing the annotations of the bounding boxes
  image_files=[image for image in sorted(image_files)]

  return image_files

In [20]:
### The following function preprocesses the image if necessary
### The image is then resized to 224*224 and stored in an array X 


def image_preprocessing_step(image_files,model):
 
  X=[]
  for file in image_files:
      ### Each image is read
      img = cv2.imread(file)
      #print(np.array(img))
      ### A common kernel sharpening technique is used only for the CNN model to have some preprocessing before model training      
      if model=="cnn":
        kernel_sharpening = np.array([[-1,-1,-1], 
                                  [-1,9,-1], 
                                  [-1,-1,-1]])
        img = cv2.filter2D(img, -1, kernel_sharpening)
      
      
      ### The images are consistently resized to 224*224
      img = cv2.resize(img, (IMAGE_SIZE,IMAGE_SIZE))
      
      ### The image arrays are appended to List X
      X.append(np.array(img))

      
  return X

In [22]:

def annotations_extract():

  annotations_dir = os.getcwd() + '/annotations/'


  ### We join a wilcard match to extract all png paths inside image directory
  annotations_path = os.path.join(annotations_dir,'*xml')

  ### We access all annotation file names and store it in a list
  annotations_files = glob.glob(annotations_path)

  #We sort the annotations to maintain order with the images
  annotations_files=[annote for annote in sorted(annotations_files)]

  return annotations_files



In [23]:
### The following function picks up the boundaries of the license plate from the image annnotations.
### xmin, xmax, ymin, ymax denote the 4 co-ordinates
### The co-ordinates are rescaled to the 224*224 co-ordinate system acccounting for the height and width of different images

def annotation_standardise(f):
    annotation_tree = etree.parse(f)
    for dim in annotation_tree.xpath("size"):
        width = int(dim.xpath("width")[0].text)
        height = int(dim.xpath("height")[0].text)
    for dim in annotation_tree.xpath("object/bndbox"):
        xmin = int(dim.xpath("xmin")[0].text)/(width/IMAGE_SIZE)
        ymin = int(dim.xpath("ymin")[0].text)/(height/IMAGE_SIZE)
        xmax = int(dim.xpath("xmax")[0].text)/(width/IMAGE_SIZE)
        ymax = int(dim.xpath("ymax")[0].text)/(height/IMAGE_SIZE)
    return [int(xmax), int(ymax), int(xmin), int(ymin)]



In [24]:
### The Boundaries are extracted from each XML File

def boundary_extract(annotations_files):
  y=[]
  for annotation in annotations_files:
  ### We run 
      y.append(annotation_standardise(annotation))
  return y

In [26]:
### Test train prepartion for 80/20 split
### Validation is 10% of training set

def train_test(X,y):
  
  X=np.array(X)
  y=np.array(y)
  ### The RGB pixel values are normalised from 0 to 1
  X = X / 255
  y = y / 255
  # Test train Dataset split
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=1)
  return X_train, X_val, y_train, y_val,X_test,y_test