In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
from google.colab import drive
drive.mount('/content/gdrive/')

import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
font = cv2.FONT_HERSHEY_COMPLEX
from google.colab.patches import cv2_imshow
from PIL import Image

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive/


# Step 1 - Identify bank notes in the image

## Problem description

Given an input image containing one or multiple bank notes of some currency,
we need to find the bounding polygons (here, simply rectangles) for each bank note. Individually identified bank notes are then passed to the next step
of our image processing pipeline.

## Solution description

The color image is passed to a Canny edge detector. We use color image here,
because edge detection works better on it, since variations in color form edges that are important to us and would not be captured in a grayscale image.

The edge map is then passed to `findContours`, which finds all the closed contours in the edge map. 

Finally, for each contour we find a bounding rectangle and only keep ones that
are at least of size 80x50 (subject to change) and are non-intersecting.
If 2 contours are intersecting, we keep the one with a larger area.

The selected bounding rectangles are used to crop images of bank notes from
the input image.

## Implementation

In [0]:
#Change the path name here 
img_name = "CAD.png"
# img_name = "cad-10.jpg"
path_name="/content/gdrive/My Drive/Colab Notebooks/Project/" + img_name
img_color = cv2.imread(path_name)
img_gray = cv2.imread(path_name, cv2.IMREAD_GRAYSCALE)
if img_color is None:
  print("Could not load the image")

cv2_imshow(img_gray)
cv2_imshow(img_color)

### Edge detection

In [0]:
# Detect edges using Canny edge detector
edges = cv2.Canny(img_color, 100, 40, L2gradient=True)
cv2_imshow(edges)

### Contour extraction

In [0]:
# Find contours in the image of edges
# _, threshold = cv2.threshold(edges, 250, 255, cv2.THRESH_BINARY)
_, contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

In [0]:
#For cropping, saving and giving boundary limits to currency

def rectanglesIntersection(coords1, size1, coords2, size2):
  x = max(coords1[0], coords2[0])
  y = max(coords1[1], coords2[1])
  w = min(coords1[0] + size1[0], coords2[0] + size2[0]) - x
  h = min(coords1[1]+ size1[1], coords2[1] + size2[1]) - y
  # print(w, h)
  if w <= 0 or h <= 0:
    return 0
  return w * h

def extract_segments(contours, min_width=150, min_height=50, max_width=None, max_height=None):
  segments = []
  for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    # threshold on the minimum size of the bank note.
    # since all images are resized to the same dimensions, should work fine.
    if (w > min_width and
        h > min_height and 
        ((max_width is None) or w < max_width) and
        ((max_height is None) or h < max_height)):
      # out of intersecting contours take the one with the largest area
      area = h * w
      if len(segments) == 0:
        segments.append(((x, y), (w, h)))
      add_boundary = True
      for i, segment in enumerate(segments):
        old_area = segment[1][0] * segment[1][1]
        intersection = rectanglesIntersection((x, y), (w, h), segment[0], segment[1])
        if intersection > area * 0.1: # more than 10% intersects
          add_boundary = False
          if old_area < area:
            segments[i] = (x, y), (w, h)
      if add_boundary:
        segments.append(((x, y), (w, h)))
  return segments

def display_segments(original_img, segments):
  for i, segment in enumerate(segments):
    (x, y), (w, h) = segment
    new_img = original_img[y : y+h, x : x+w]
    print(i)
    # cv2.imwrite('/content/gdrive/My Drive/Colab Notebooks/Project/segmented-{0}.png'.format(i), new_img)
    cv2.imwrite('/content/individual_bills/segmented-{0}.png'.format(i), new_img)
    cv2_imshow(new_img)

bank_notes = extract_segments(contours)
display_segments(img_color, bank_notes)

## Results discussion

This solution works fairly well, assuming the only big objects in the image are banknotes. Since at this stage we do not classify whether the found object
is a banknote or not, any object that's large enough would be selected.
That's fine, however, since our proposal does not suggest we can deal with a more complex case. And if needed, we could add it later.

The biggest downfall of this solution is that oftentimes it doesn't include
the full image of the bank note, because the contour breaks around the transparent strip on the right (for CAD at least). The good thing is that most of the important information is on the left of that strip (such as denomination, country of origin, maple leaf), so it shouldn't affect the rest of our pipeline that much.

# Step 2 - Segment bank notes

## Problem description

Image segmentation on the bank note should identify areas of interest.
For example, the segmentation mask would contain areas with denomination of the currency, the name of the country of origin, country-specific features, such as flags. This would allow us to apply classification of areas of interest in the next step.

## Implementation plans

We already know that the task of image segmentation is best accomplished by a convolutional neural network with encoder/decoder architecture, such as UNet.
So for this part of the project we can just reuse UNet from our A3. However, the main challenge here is that we have little to no data of images of currency.
We have found no dataset that contains images of currency and corresponding segmentation masks with areas of interest. Any training/testing data we would need to create ourselves. It is possible to manually create a small dataset,
but it clearly would not be good enough to get high accuracy on the test set.

One approach I want to try here is transfer learning, just like in A3. Train UNet on a large dataset of different data and then use the pretrained model
to improve performance on our small currency data set. The difference from A3
is that there transfer dataset was very similar to the original one (i.e. it also contained images of cats), which is not the case here. So we probably need a different approach to transfer learning. For example, treat the last layer of that UNet as a feature input that it is input to our currency segmentator.

Well, in any case, our first order of business should be to obtain a currency dataset.

## UNet implementation

In [0]:
# Install TensorFlow
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
import numpy as np 
import cv2 as cv
import glob
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow.keras.optimizers as optimizers
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K

def diceCoef(mask, predicted_mask, smooth=10 ** (-7)):
    intersection = K.sum(K.abs(mask * predicted_mask), axis=[1,2,3])
    union = K.sum(K.abs(mask), axis=[1, 2, 3]) + K.sum(K.abs(predicted_mask), axis=[1, 2, 3])
    dice = (2. * intersection + smooth) / (union + smooth)
    return dice

class UNet:
    def __init__(self, loss="binary_crossentropy"):
        self.test_input = None
        self.train_input = None
        self.test_seg = None
        self.train_seg = None
        self.train_n = 0
        self.test_n = 0
        self.model = None
        self.loss = loss
    
    def buildModel(self):
        # contracting path
        inputs = keras.Input(shape=(256, 256, 1))
        conv64_1 = layers.Conv2D(64, 3, padding="same", activation="relu",
                                 kernel_initializer="he_normal")(inputs)
        conv64_2 = layers.Conv2D(64, 3, padding="same", activation="relu",
                                 kernel_initializer="he_normal")(conv64_1)
        dropout64 = layers.Dropout(0.2)(conv64_2)
        maxpool1 = layers.MaxPool2D(pool_size=(2, 2), strides=2,
                                    padding="same")(dropout64) # 64 x 64
        conv128_1 = layers.Conv2D(128, 3, padding="same", activation="relu",
                                  kernel_initializer="he_normal")(maxpool1)
        conv128_2 = layers.Conv2D(128, 3, padding="same", activation="relu",
                                  kernel_initializer="he_normal")(conv128_1)
        dropout128 = layers.Dropout(0.2)(conv128_2)
        maxpool2 = layers.MaxPool2D(pool_size=(2, 2), strides=2,
                                    padding="same")(dropout128) # 32 x 32
        conv256_1 = layers.Conv2D(256, 3, padding="same", activation="relu",
                                  kernel_initializer="he_normal")(maxpool2)
        conv256_2 = layers.Conv2D(256, 3, padding="same", activation="relu",
                                  kernel_initializer="he_normal")(conv256_1)
        dropout256 = layers.Dropout(0.2)(conv256_2)

        maxpool3 = layers.MaxPool2D(pool_size=(2, 2), strides=2,
                                    padding="same")(dropout256) # 16 x 16
        conv512_1 = layers.Conv2D(512, 3, padding="same", activation="relu",
                                  kernel_initializer="he_normal")(maxpool3)
        conv512_2 = layers.Conv2D(512, 3, padding="same", activation="relu",
                                  kernel_initializer="he_normal")(conv512_1)
        dropout512 = layers.Dropout(0.2)(conv512_2)
        upsample2 = layers.UpSampling2D(size=(2, 2))(dropout512) # 32 x 32
        upconv2 = layers.Conv2DTranspose(256, 2, padding="same",
                                         activation="relu",
                                         kernel_initializer="he_normal")(upsample2)
        concat2 = layers.Concatenate()([conv256_2, upconv2])
        conv256up_1 = layers.Conv2DTranspose(128, 3, padding="same",
                                             activation="relu",
                                             kernel_initializer="he_normal")(concat2)
        conv256up_2 = layers.Conv2DTranspose(128, 3, padding="same",
                                             activation="relu",
                                             kernel_initializer="he_normal")(conv256up_1)
        dropout256up = layers.Dropout(0.2)(conv256up_2)

        upsample3 = layers.UpSampling2D(size=(2, 2))(dropout256up) # 64 x 64
        upconv3 = layers.Conv2DTranspose(128, 2, padding="same",
                                         activation="relu",
                                         kernel_initializer="he_normal")(upsample3)
        concat3 = layers.Concatenate()([conv128_2, upconv3])
        conv128up_1 = layers.Conv2DTranspose(128, 3, padding="same",
                                             activation="relu",
                                             kernel_initializer="he_normal")(concat3)
        conv128up_2 = layers.Conv2DTranspose(128, 3, padding="same",
                                             activation="relu",
                                             kernel_initializer="he_normal")(conv128up_1)
        dropout128up = layers.Dropout(0.2)(conv128up_2)
        upsample4 = layers.UpSampling2D(size=(2, 2))(dropout128up) # 128 x 128
        upconv4 = layers.Conv2DTranspose(64, 2, padding="same",
                                         activation="relu",
                                         kernel_initializer="he_normal")(upsample4)
        concat4 = layers.Concatenate()([conv64_2, upconv4])
        conv64up_1 = layers.Conv2DTranspose(64, 3, padding="same",
                                            activation="relu",
                                            kernel_initializer="he_normal")(concat4)
        conv64up_2 = layers.Conv2DTranspose(64, 3, padding="same",
                                            activation="relu",
                                            kernel_initializer="he_normal")(conv64up_1)
        dropout64up = layers.Dropout(0.2)(conv64up_2)
        conv64up_3 = layers.Conv2DTranspose(2, 3, padding="same",
                                            kernel_initializer="he_normal")(dropout64up)
        
        outputs = layers.Conv2DTranspose(
            filters=1,
            kernel_size=1,
            padding="same",
            activation="sigmoid",
            kernel_initializer="he_normal"
       )(conv64up_3)


        self.model = keras.Model(inputs=inputs, outputs=outputs)
        
        self.model.compile(optimizer=optimizers.Adam(),
              loss=self.loss,
              metrics=['accuracy', diceCoef])
        print("UNet model is complete")

    def train(self, epochs, batch_size=10, validation_split=0.15):
        print("Starting training...")
        self.model.fit(
            self.train_input,
            self.train_seg,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split)

    
    def evaluate(self):
        print("Evaluating segmentation model...")
        self.model.evaluate(self.test_input, self.test_seg, verbose=2)

    def predict(self, imgs):
        predictions = self.model.predict(imgs)
        return predictions
    
    def saveWeights(self, name):
        self.model.save_weights(name)
    
    def loadWeights(self, name):
        self.model.load_weights(name)
    
    def plotPrediction(self, prediction, truth, img):
        fig = plt.figure(figsize=(10, 10))
        fig.add_subplot(1, 3, 1)
        plt.imshow(prediction[...,0], cmap="gray")
        fig.add_subplot(1, 3, 2)
        plt.imshow(truth[..., 0], cmap="gray")

        fig.add_subplot(1, 3, 3)
        plt.imshow(img[..., 0], cmap="gray")

        plt.show()

    def loadData(self, trainInputDirPath, trainSegDirPath, testInputDirPath, testSegDirPath, trainAugInput=None, trainAugSeg=None):
        self.train_input = self.__loadDataFromDir(trainInputDirPath)
        self.test_input = self.__loadDataFromDir(testInputDirPath)
        self.train_seg = self.__loadDataFromDir(trainSegDirPath)
        self.test_seg = self.__loadDataFromDir(testSegDirPath)
        if trainAugInput and trainAugSeg:
            self.train_input = np.vstack((self.train_input, self.__loadDataFromDir(trainAugInput)))
            self.train_seg = np.vstack((self.train_seg, self.__loadDataFromDir(trainAugSeg)))
        self.train_n = self.train_input.shape[0]
        self.test_n = self.test_input.shape[0]

        print("Data is loaded:")
        print("Number of training images: {0}".format(self.train_n))
        print("Number of testing images: {0}".format(self.test_n))
        print("Training input set shape: {0}".format(self.train_input.shape))
        print("Training segmentation set shape: {0}".format(self.train_seg.shape))
        print("Testing input set shape: {0}".format(self.test_input.shape))
        print("Testing segmentation set shape: {0}".format(self.test_seg.shape))

    def loadDataAndSplit(self, inputDirPath, segDirPath):
        # load data and split it into train and test sets
        inputs = self.__loadDataFromDir(inputDirPath)
        masks = self.__loadDataFromDir(segDirPath)
        from sklearn.model_selection import train_test_split
        self.train_input, self.test_input, \
        self.train_seg, self.test_seg = \
        train_test_split(inputs, masks, test_size=0.2, random_state=42)
    
    def __loadDataFromDir(self, dirPath):
        imgs = []
        for i, imgName in enumerate(sorted(glob.glob(dirPath + "/*"))):
            img = cv.imread(imgName)
            img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
            img = cv.resize(img, (256, 256)) / 255.0
            img = np.expand_dims(img, axis=2).astype(np.float32)
            imgs.append(img)
        
        imgsMatrix = np.array(imgs)
        return imgsMatrix

## Training UNet

In [0]:
!cp -r "/content/gdrive/My Drive/Colab Notebooks/Project/oxford_data" ./
# !unzip "/content/oxford_data-20191107T212728Z-001.zip" 

In [0]:
transferUnet = UNet(loss="binary_crossentropy")
transferUnet.loadData(
        "./oxford_data/Train/input",
        "./oxford_data/Train/output",
        "./oxford_data/Test/input",
        "./oxford_data/Test/output")
transferUnet.buildModel()

Data is loaded:
Number of training images: 0
Number of testing images: 0
Training input set shape: (0,)
Training segmentation set shape: (0,)
Testing input set shape: (0,)
Testing segmentation set shape: (0,)
UNet model is complete


In [0]:
total_epochs = 0

In [0]:
epochs = 15
total_epochs += epochs
transferUnet.train(epochs, 32)
transferUnet.evaluate()

In [0]:
transferUnet.saveWeights("/content/gdrive/My Drive/Colab Notebooks/Project/oxford_unet_256_epochs_{0}.h5".format(total_epochs))

In [0]:
def plotTestPrediction(unet, i=0):
    img = unet.test_input[i]
    predictions = unet.predict(np.array([img]))
    unet.plotPrediction(predictions[0], unet.test_seg[i], unet.test_input[i])

def plotTrainPrediction(unet, i=0):
    img = unet.train_input[i]
    predictions = unet.predict(np.array([img]))
    print(crossEntropy(predictions, np.array([unet.train_seg[i]])))
    unet.plotPrediction(predictions[0], unet.train_seg[i], unet.train_input[i])

In [0]:
plotTestPrediction(transferUnet, 0)

## Refine UNet on currency data

We have a very small data set of bills and their segmentation masks,
so the results that we get are probably overfitted and suboptimal.
One thing that we've done to improve it, is to augment our data set. 
We've randomly rotated every image/mask pair in the dataset.
Other transformations do not really make sense, as translating/zooming in might
make the segment we want move out of the view entirely.

One possibly transformation that we could do is a projective transform. 
However, I'm not sure how to implement that right now.

In [0]:
# !cp -r "/content/gdrive/My Drive/Colab Notebooks/Project/currency_data" ./
!cp "/content/gdrive/My Drive/Colab Notebooks/Project/data.zip" ./
!unzip "/content/data.zip" 

We have a very small dataset, so we augmented
it by randomly rotating every image a bunch of times.

In [0]:
import numpy as np
import cv2 as cv
import glob
import matplotlib.pyplot as plt

class DataAugmentor:
    def __init__(self, inputDirPath, maskDirPath):
        # read all images
        imgs = []
        masks = []
        for imgName in glob.glob(inputDirPath + "/*"):
            img = cv.imread(imgName)
            img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
            img = cv.resize(img, (128, 128)) / 255.0
            imgs.append(img)
        self.imgs = np.array(imgs)
        for imgName in glob.glob(maskDirPath + "/*"):
            img = cv.imread(imgName)
            img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
            img = cv.resize(img, (128, 128)) / 255.0
            masks.append(img)
        self.masks = np.array(masks)
        
        self.augmentedImgs = []
        self.augmentedMasks = []
    
    def augmentAll(self, n=1):
        for epoch in range(n):
            # randomly rotate every image n number of times
            for i in range(len(self.imgs)):
                img = self.imgs[i]
                mask = self.masks[i]
                img, mask = self.rotateRandom(img, mask)
                self.augmentedImgs.append(img)
                self.augmentedMasks.append(mask)
        self.augmentedImgs = np.array(self.augmentedImgs)
        self.augmentedMasks = np.array(self.augmentedMasks)
    
    def saveAugmented(self, inputDirPath, maskDirPath):
        print("Saving {0} augmented images".format(len(self.augmentedImgs)))
        for i in range(len(self.augmentedImgs)):
            fileName = "{0}/augmented-{1}.jpg".format(inputDirPath, i)
            cv.imwrite(fileName, self.augmentedImgs[i] * 255)
        for i in range(len(self.augmentedMasks)):
            fileName = "{0}/augmented-{1}.jpg".format(maskDirPath, i)
            cv.imwrite(fileName, self.augmentedMasks[i] * 255)

    def rotateRandom(self, img, mask):
        rotation = np.random.randint(1, 36) * 10

        rows, cols = img.shape[:2]
        M = cv.getRotationMatrix2D((cols / 2,rows / 2), rotation ,1)
        img = cv.warpAffine(img, M, (cols,rows))
        mask = cv.warpAffine(mask, M, (cols, rows))
        return img, mask

if __name__ == '__main__':
    augmentor = DataAugmentor("./inputs", "./masks")
    augmentor.augmentAll(10)
    augmentor.saveAugmented("./inputs", "./masks")

In [0]:
currencyUnet = UNet(loss="binary_crossentropy")
currencyUnet.loadDataAndSplit(
        "./currency_data/inputs",
        "./currency_data/masks")
currencyUnet.buildModel()
currencyUnet.loadWeights("/content/gdrive/My Drive/Colab Notebooks/Project/oxford_unet_epochs_35.h5")

UNet model is complete


In [0]:
currencyUnet.train(10, 8, 0)
currencyUnet.evaluate()

Starting training...
Train on 96 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Evaluating segmentation model...
25/1 - 0s - loss: 0.1649 - accuracy: 0.9354 - diceCoef: 0.5617


## Evaluate results

In [0]:
import cv2
import matplotlib.pyplot as plt

bills = []
for i, imgName in enumerate(sorted(glob.glob("/content/individual_bills/*"))):
  billImg = cv2.imread(imgName)
  billImg = cv2.cvtColor(billImg, cv2.COLOR_RGB2GRAY)
  billImg = np.expand_dims(cv2.resize(billImg, (256, 256)) / 255.0, 2)

  bills.append(billImg)

bills = np.array(bills)
print("Predicting masks for {0} bills".format(len(bills)))
predictions = currencyUnet.predict(bills)
for i, prediction in enumerate(predictions):
  prediction = prediction[..., 0]
  fig = plt.figure(figsize=(10, 10))
  fig.add_subplot(1, 2, 1)
  plt.imshow(prediction, cmap="gray")
  fig.add_subplot(1, 2, 2)
  plt.imshow(bills[i][..., 0], cmap="gray")

# Step 3 - Extract areas of interest

## Extract segment with denomination

Threshold the segmentation mask to obtain strong boundaries and then
find contours in the resulting mask. Extract bounding rectangles for values on bills.

In [0]:
for i, prediction in enumerate(predictions):
  predicted_mask = (prediction[...,0] * 255).astype(np.uint8)
  _, thresholded = cv2.threshold(predicted_mask, 127, 255, cv2.THRESH_BINARY)
  _, contours_bill, _ = cv2.findContours(thresholded, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

  original_contour = bills[i][..., 0] * 255
  original = bills[i][..., 0] * 255
  segments = []
  for j, cnt in enumerate(contours_bill):
      approx = cv2.approxPolyDP(cnt, 0.01 * cv2.arcLength(cnt, True), True)
      cv2.drawContours(original_contour, [approx], 0, (0), 1)
      x = approx.ravel()[0]
      y = approx.ravel()[1]
      x, y, w, h = cv2.boundingRect(cnt)
      segment = original[y : y + h, x : x + w]
      segments.append(segment)

  fig = plt.figure(figsize=(10, 10))
  # fig.suptitle("Thresholded mask for bill {0}".format(i))
  fig.add_subplot(1, 3, 1)
  plt.imshow(thresholded, cmap="gray")
  fig.add_subplot(1, 3, 2)
  plt.imshow(bills[i][..., 0], cmap="gray")
  fig.add_subplot(1, 3, 3)
  plt.imshow(original_contour, cmap="gray") 

  areas = extract_segments(contours_bill, 10, 80, 120, 120)
  for j, area in enumerate(areas):
    (x, y), (w, h) = area
    segment = original[y : y+h, x : x+w]
    cv2.imwrite("./segmented_bills/segment-{0}-{1}.jpg".format(i, j), segment)




In [0]:
value_images = []
for i, segmentName in enumerate(sorted(glob.glob("/content/segmented_bills/*"))):
  value = cv2.imread(segmentName, cv2.CV_8UC1)
  mean = np.mean(value)
  ret, thresh_img = cv2.threshold(value, mean * 0.8, 255, cv2.THRESH_BINARY)
  cv2_imshow(thresh_img)
  value_images.append(thresh_img)


## Extract digits

Extract digits from the images of bill values,
and preprocess each digit into a 28x28 image,
which can be fed directly into the neural network
in the next step

In [0]:
from imutils.perspective import four_point_transform
from imutils import contours
import imutils

bills_to_digits = {}
for j, value_img in enumerate(value_images):
	# find contours in the image, then initialize the
	# digit contours lists
	edges = cv2.Canny(value_img, 100, 200)
	cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(cnts)
 
	digitCnts = extract_segments(cnts, 10, 40)
	# display_segments(value_img, digitsCnts)

	# # loop over each of the digits
	bills_to_digits[j] = []
	for i, c in enumerate(digitCnts):
		# extract the digit ROI
		(x, y), (w, h) = c[0], c[1]
		roi = value_img[y:y + h, x:x + w]
		cv2_imshow(roi)
		# convert every digit image into a 
		# 28x28 image, while retaining the original
		# ratios.
		# This is done to improve digit recognition,
		# as modifying ratios often leads to incorrect
		# predictions.
		img_square = np.zeros((28, 28))
		img_square[:, :] = 255
		# make digit slightly wider than original
		ratio = 1.5 * roi.shape[1] / roi.shape[0]
		scaled_width = int(ratio * 28)
		leftOffset = (28 - scaled_width) // 2
		rightOffset = leftOffset
		if scaled_width % 2 != 0:
			rightOffset += 1
		roi = cv2.resize(roi, (scaled_width, 20), interpolation=cv2.INTER_LINEAR)
		img_square[3: 23, leftOffset : -rightOffset] = roi
		_, img_square = cv2.threshold(img_square, 150, 255, cv2.THRESH_BINARY_INV)
		# img_square[3: 23, 3 : 23] = img

		
		cv2.imwrite("./digit-{0}.png".format(i), img_square)
		cv2_imshow(img_square)
		# add x coordinate to digit, so that we can
		# tell which digit comes first
		bills_to_digits[j].append((x, img_square))
	

## Digit classification

In [0]:
# save the final model to file
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Reference: https://machinelearningmastery.com/how-to-develop-a-convolutional-neural-network-from-scratch-for-mnist-handwritten-digit-classification/

# load train and test dataset
def load_dataset():
  # load dataset
  (trainX, trainY), (testX, testY) = mnist.load_data()
  # reshape dataset to have a single channel
  trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
  testX = testX.reshape((testX.shape[0], 28, 28, 1))
  
  # one hot encode target values
  trainY = to_categorical(trainY, 10)
  testY = to_categorical(testY, 10)
  return trainX, trainY, testX, testY

# scale pixels
def pre_process(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

# define cnn model
def define_model():
  model = Sequential()
  model.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
  model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
  model.add(layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(layers.Dropout(0.25))
  model.add(layers.Flatten(), input_shape=(28, 28, 1))
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(10, activation='softmax'))
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

# run the test to evaluate the model
def test():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    cv2_imshow(testX[1][..., 0])
    # prepare pixel data
    trainX, testX = pre_process(trainX, testX)
    # define model
    model = define_model()
    # fit model
    model.fit(trainX, trainY, epochs=10, batch_size=128, verbose=1)
    # save model
    model.save('final_model.h5')
    # load model
    # model = load_model('final_model.h5')
    # _, acc = model.evaluate(testX, testY, verbose=0)
    # print('> %.3f' % (acc * 100.0))
			
# entry point, run the test harness
test()

In [0]:
# load and prepare the image
def load_image(filename):
	# load the image
	img = cv2.imread(filename, cv2.COLOR_BGR2GRAY)
	# reshape into a single sample with 1 channel
	img_square = img
	cv2_imshow(img_square)
	img_square = np.expand_dims(img_square, 0)
	img_square = np.expand_dims(img_square, 3)
	# prepare pixel data
	img_square = img_square.astype('float32')
	img_square = img_square / 255.0
	return img_square

def predict_value(image):
	# load the image
	img = np.expand_dims(image, 0)
	img = np.expand_dims(img, 3)
	# prepare pixel data
	img = img.astype('float32')
	img = img / 255.0
	# img = load_image(image)
	# load model
	model = load_model('final_model.h5')
	# predict the class
	digit = model.predict_classes(img)
	print(digit[0])
	return str(digit[0])

# entry point, run the example
bill_values = []
for index, digitImgs in bills_to_digits.items():
	value = ""
	print("Predicting value of bill", index)
	for digitOrder, digitImg in sorted(digitImgs, key = lambda pair: pair[0]):
		cv2_imshow(digitImg)
		value += predict_value(digitImg)
	value = int(value)
	print("Predicted value is", value)
	bill_values.append(value)
print("Bill values "+ str(bill_values))

# Step 4 - Currency type classification

Determine whether the bill is CAD or USD by doing template matching
against maple leaf and federeal reserve stamp. If maple leaf results in a better
match, classify as CAD, if federal reserve stamp results in a better match,
classify as USD. 

In [0]:
import imutils

# Load the first bill image and convert to grayscale
image = cv2.imread('/content/individual_bills/segmented-0.png')
# image = cv2.imread('/content/gdrive/My Drive/Colab Notebooks/Project/usd-5-front.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2_imshow(gray)

template_cad = cv2.imread('/content/gdrive/My Drive/Colab Notebooks/Project/tcad-20-front-2.jpg')
template_usd = cv2.imread('/content/gdrive/My Drive/Colab Notebooks/Project/tusd-100-front-2.jpg')
template_usd = cv2.resize(template_usd, (template_cad.shape[1], template_cad.shape[0]))

def gradient(img):
  lap = cv2.Laplacian(img, cv2.CV_8U)
  _, thresh = cv2.threshold(lap, 100, 255, cv2.THRESH_BINARY)
  return thresh

def matchTemplateAtScale(image, template):
  template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
  template = imutils.resize(template, width = int(template.shape[1] * 0.5))
  template = cv2.Canny(template, 100, 200)
  # template = gradient(template)

  (tH, tW) = template.shape[:2]
  cv2_imshow(template)

  print("Template size", template.shape)
  print("Image size", image.shape)

  # loop over the scales of the image
  orig_width = gray.shape[1]
  globalMinVal, globalMinLoc = np.float32("-inf"), None
  optimalSize = gray.shape[1], gray.shape[0]
  optimalR = 0
  optimalResized = None
  for scale in np.linspace(0.2, 2, 50)[::-1]:
    # resize the image according to the scale, and keep track
    # of the ratio of the resizing
    resized = imutils.resize(gray, width = int(gray.shape[1] * scale))
    if resized.shape[1] < template.shape[1] or resized.shape[0] < template.shape[0]:
      # scale is too small
      continue
    r = orig_width / float(resized.shape[1])
    # detect edges in the resized, grayscale image and apply template
    # matching to find the template in the image
    edges = cv2.Canny(resized, 100, 200)
    result = cv2.matchTemplate(edges, template, cv2.TM_CCOEFF)
    # cv2_imshow(thresh)
    (minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(result)
    if maxVal > globalMinVal:
      globalMinVal = maxVal
      globalMinLoc = maxLoc
      optimalSize = resized.shape[1], resized.shape[0]
      optimalR = r
      optimalResized = resized
    (startX, startY) = (int(maxLoc[0]), int(maxLoc[1]))
    (endX, endY) = (startX + tW, startY + tH)

  print("Highest score:", globalMinVal)

  (startX, startY) = (int(globalMinLoc[0]),
                      int(globalMinLoc[1]))
  (endX, endY) = (int((globalMinLoc[0] + tW)),
                  int((globalMinLoc[1] + tH)))

  # draw a bounding box around the detected result and display the image
  cv2.rectangle(optimalResized, (startX, startY), (endX, endY), (0, 0, 255), 2)
  cv2_imshow(optimalResized)
  return globalMinVal

cad_score = matchTemplateAtScale(image, template_cad)
usd_score = matchTemplateAtScale(image, template_usd)

# Final aggregation

In [0]:
total_value = sum(bill_values)
if cad_score > usd_score:
  denom = "CAD"
else:
  denom = "USD"

print("The image contains ${0} {1}".format(total_value, denom))