# Visualize the data

Before we can start any training, you need to get a feel for the data you have and ask some question.

## Setup

In [None]:
# import the necessary libraries
from matplotlib import pyplot as plt
import imutils
import numpy as np
import argparse
import cv2
import os
from PIL import Image

# scratch directory is apart of the .gitignore to ensure it is not committed to git
%env SCRATCH=../scratch
! [ -e "${SCRATCH}" ] || mkdir -p "${SCRATCH}"

scratch_path = os.environ.get('SCRATCH', 'scratch')

# View the data

## Lets make sure we have an even (unbiased) number of examples for each type

Or we might teach our model that right fingerprints are more common than left ones?

In [None]:
directories = [scratch_path + "/train/left/", scratch_path + "/train/right/"]

file_data = {}

for directory in directories:
    file_count = sum(len(files) for _, _, files in os.walk(directory))
    dir_name = os.path.basename(directory.rstrip('/'))
    file_data[dir_name] = file_count

# Extract filenames and file counts from the file_data dictionary
filenames = list(file_data.keys())
file_counts = list(file_data.values())

# Create the histogram
plt.bar(filenames, file_counts)

# Set the labels and title
plt.xlabel('Directory')
plt.ylabel('File Count')
plt.title('Number of Examples to train')

# Rotate the x-axis labels for better visibility
plt.xticks(rotation=45)

# Display the histogram
plt.show()


In [None]:
def plt_imshow(title, image):
	# convert the image frame BGR to RGB color space and display it
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	plt.imshow(image)
	plt.title(title)
	plt.grid(False)
	plt.show()

## Implement OpenCV image loading script

In [None]:
# load the input image and display it to our screen
args = {
	"image": scratch_path + "/train/left/1__M_Left_index_finger_CR.png"
}

## Load the image from disk and grab spatial dimensions

In [None]:
# loads the image set above from disk into image
image = cv2.imread(args["image"])
(h, w, c) = image.shape[:3]

# display the image dimensions
print("width: {} pixels".format(image.shape[1]))
print("heigth: {} pixels".format(image.shape[0]))
print("channels: {}".format(image.shape[2]))

# show the image 
plt_imshow("Original", image)

# save the image back to disk and wait for keypress
#cv2.imwrite(scratch_path + "/train_lr/left/1__M_Left_index_finger_CR.png", image)

## What does this image look like

We use Pillow to open an image (with PIL.Image.open), and immediately convert the PIL.Image.Image object into an 8-bit (dtype=uint8) numpy array. Each inner list represents a pixel.Since it's a black and white image, R, G, and B are all similar. Matplotlib supports float32 and uint8 data types. For grayscale, Matplotlib supports only float32. If your array data does not meet one of these descriptions, you need to rescale it.

In [None]:
img = np.asarray(Image.open(scratch_path + "/train/left/1__M_Left_index_finger_CR.png"))
print(repr(img))

Sometimes you want to enhance the contrast in your image, or expand the contrast in a particular region while sacrificing the detail in colors that don't vary much, or don't matter. A good tool to find interesting regions is the histogram. To create a histogram of our image data, we use the hist() function.

In [None]:
plt.hist(image.ravel(), bins=range(256), fc='k', ec='k')

## Crop the border

There is a border around the image that will make our model think every fingerprint has a border around it, since they don't we want to crop this out. Something to be aware of is if the source ever changes and removes the border we will want to drop the cropping we are about to do.

In [None]:
# cropping an image with OpenCV is accomplished via simple NumPy
# array slices in startY:endY, startX:endX order
cropped = image[5:99,2:92]

# display the image dimensions
print("width: {} pixels".format(image.shape[1]))
print("heigth: {} pixels".format(image.shape[0]))
print("channels: {}".format(image.shape[2]))

# show the image 
plt_imshow("crop out fingerprint border", cropped)

# save the image
cv2.imwrite(scratch_path + "/train_lr/left/1__M_Left_index_finger_CR.png", cropped)

In [None]:
plt.hist(cropped.ravel(), bins=range(256), fc='k', ec='k')

## Image Arithmetic 

Since we do not want our model to always expect that fingerprint images are dark contrast, we should add variability to each image.

In [None]:
# images are NumPy arrays stored as unsigned 8-bit integers (unit8)
# with values in the range [0, 255]; when using the add/subtract
# functions in OpenCV, these values will be *clipped* to this range,
# even if they fall outside the range [0, 255] after applying the
# operation
added = cv2.add(np.uint8([200]), np.uint8([100]))
subtracted = cv2.subtract(np.uint8([50]), np.uint8([100]))
print("max of 255: {}".format(added))
print("min of 0: {}".format(subtracted))

In [None]:
# using NumPy arithmetic operations (rather than OpenCV operations)
# will result in a modulo ("wrap around") instead of being clipped
# to the range [0, 255]
added = np.uint8([200]) + np.uint8([100])
subtracted = np.uint8([50]) - np.uint8([100])
print("wrap around: {}".format(added))
print("wrap around: {}".format(subtracted))

In [None]:
# increasing the pixel intensities in our input image by 100 is
# accomplished by constructing a NumPy array that has the *same
# dimensions* as our input image, filling it with ones, multiplying
# it by 100, and then adding the input image and matrix together
M = np.ones(cropped.shape, dtype="uint8") * 100
added = cv2.add(cropped, M)
plt_imshow("Lighter", added)

# save the image
cv2.imwrite(scratch_path + "/train_lr/left/1__M_Left_index_finger_CR_lighter.png", added)

In [None]:
plt.hist(added.ravel(), bins=range(256), fc='k', ec='k')

In [None]:
# similarly, we can subtract 50 from all pixels in our image and make it
# darker
M = np.ones(cropped.shape, dtype="uint8") * 50
subtracted = cv2.subtract(cropped, M)
plt_imshow("Darker", subtracted)

# save the image
cv2.imwrite(scratch_path + "/train_lr/left/1__M_Left_index_finger_CR_darker.png", subtracted)

In [None]:
plt.hist(subtracted.ravel(), bins=range(256), fc='k', ec='k')

## Rotate the image

Because we should expect our model to only identify a fingerprint when it is upright, we should add some rotation.

In [None]:
# rotate our image by 33 degrees counterclockwise, ensuring the
# entire rotated image still views in the viewing area
rotated = imutils.rotate_bound(image, -10)
plt_imshow("Rotated Without Cropping", rotated)

In [None]:
rotated = imutils.rotate_bound(image, -20)
plt_imshow("Rotated Without Cropping", rotated)

In [None]:
rotated = imutils.rotate_bound(image, -30)
plt_imshow("Rotated Without Cropping", rotated)

In [None]:
rotated = imutils.rotate_bound(image, -40)
plt_imshow("Rotated Without Cropping", rotated)

In [None]:
rotated = imutils.rotate_bound(image, -50)
plt_imshow("Rotated Without Cropping", rotated)