In [5]:
import math
import numpy as np
import re
import os

from collections import Counter
from matplotlib import pyplot as plt
from PIL import Image
import cv2 

from ipywidgets import interact
import ipywidgets as ipywidgets

from keras import layers
from keras import models
from keras import optimizers

from keras import backend
from keras.utils import *


# Constants
LETTER_PATH = '/home/fizzer/enph353_git/beep-boop/labs/lab5/enph353_cnn_lab/resized_letters'
NUMBER_PATH = '/home/fizzer/enph353_git/beep-boop/labs/lab5/enph353_cnn_lab/resized_numbers'
H = 298
W = 600
NUMBER_OF_LABELS = 36                # Number of different labels, 10 + 26 = 36
CONFIDENCE_THRESHOLD = 0.05          # How accurate the training model is
VALIDATION_SPLIT = 0.2               # Portion of dataset to be used in validation


def files_in_folder(folder_path):
  '''
  Returns a list of strings where each entry is a file in the folder_path.
  
  Parameters
  ----------
  
  folder_path : str
     A string to folder for which the file listing is returned.
     
  '''
  files_A = !ls "{folder_path}"
  # The files when listed from Google Drive have a particular format. They are
  # grouped in sets of 4 and have spaces and tabs as delimiters.
  
  # Split the string listing sets of 4 files by tab and space and remove any 
  # empty splits.
  files_B = [list(filter(None, re.split('\t|\s', files))) for files in files_A]
  
  # Concatenate all splits into a single sorted list
  files_C = []
  for element in files_B:
    files_C = files_C + element
  files_C.sort()
  
  return files_C


def convert_to_one_hot(Y,C):
    '''
    Returns an array of one hot encoding for dataset Y, with size C.
    
    Parameters
    ----------
    
    Y : array
        An array of the dataset.
        
    C : int
        The size of the array.
        
    '''
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y


def displayImage(index):
    '''
    Displays image of a specified index.
    
    Parameters
    ----------
    
    index : int
        The index of image to be displayed.
        
    '''
    plt.imshow(X_charset[index])
    caption = ("y = " + str(Y_charset[index]))
    plt.text(0.5, 0.5, caption, 
            color = 'orange', fontsize = 20,
            horizontalalignment = 'left', verticalalignment = 'top')
    
# Reinitialize model parameters.
def reset_weights(model):
    session = backend.get_session()
    for layer in model.layers:
        
        # Look for the layer with attribute = 'kernel_initializer' and run session
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel.initializer.run(session=session)

# Specifying paths to folder
letter_folder = LETTER_PATH
number_folder = NUMBER_PATH

# Generating a list of files in the specified folder
letters = files_in_folder(letter_folder)
numbers = files_in_folder(number_folder)

# Open the images for numbers and label each number. NOTE: '0' corresponds to label 0, '1' corresponds 
# to label 1, etc.
numberSet = np.array([[np.array(Image.open(f'{number_folder}/{n}')), ord(os.path.splitext(n)[0]) - 48]
                      for n in numbers[:]])
print("Loaded {:} images from folder:\n{}".format(numberSet.shape[0], number_folder))

# Open the images for letters and label each letter. NOTE: A corresponds to label 10, B to label 11, etc.
letterSet = np.array([[np.array(Image.open(f'{letter_folder}/{l}')), ord(os.path.splitext(l)[0]) - 55]
                      for l in letters[:]])
print("Loaded {:} images from folder:\n{}".format(letterSet.shape[0], letter_folder))

# Concatenate the two data sets to form the X-axis (i.e. character set). Default axis is 0
charSet = np.concatenate((numberSet, letterSet))

# Generate X and Y axis
X_charset_orig = np.array([char[0] for char in charSet[:]])
Y_charset_orig = np.array([[char[1]] for char in charSet]).T

# Normalize the X dataset by dividing by 255
X_charset = X_charset_orig/255.

# Generate one hot encoding for Y character set
Y_charset = convert_to_one_hot(Y_charset_orig, NUMBER_OF_LABELS).T

print("Total examples: {:d}\nTraining examples: {:d}\nTest examples: {:d}".
     format(X_charset.shape[0],
           math.ceil(X_charset.shape[0] * (1-VALIDATION_SPLIT)),             # rounding up
           math.floor(X_charset.shape[0] * VALIDATION_SPLIT)))               # rounding down
print("X shape:" + str(X_charset.shape))
print("Y shape:" + str(Y_charset.shape))

# Train CNN
# Model definition


Loaded 10 images from folder:
/home/fizzer/enph353_git/beep-boop/labs/lab5/enph353_cnn_lab/resized_numbers
Loaded 26 images from folder:
/home/fizzer/enph353_git/beep-boop/labs/lab5/enph353_cnn_lab/resized_letters
Total examples: 36
Training examples: 29
Test examples: 7
X shape:(36, 140, 100, 4)
Y shape:(36, 36)
