## OCR - Part 2 (Training Data and Feature Extraction)

### Feature Extraction


In [8]:
from os                import path, getcwd, listdir
from PIL               import Image, ImageChops, ImageDraw, ImageFilter, ImageOps
from glob              import glob
from matplotlib.pyplot import imshow

import numpy             as np
import matplotlib.pyplot as plt

In [6]:
# function for reading and showing images

# images = glob(path.join(getcwd(), 'sample', '*.jpg'))

def readImage(image):
    return Image.open(image)

def showImage(image):
    plt.imshow(image)
    plt.show()

In [5]:
# Getting the max width and height of training images generated in previous step
%store -r max_values
max_width, max_height = max_values

print(max_width)
print(max_height)

28
29


In [20]:
# Creating final training images and calling extractFeatures()

X_train = []
Y_train = []
X_test  = []

def generateData(task='train'):
    if task == 'train':
        global X_train, Y_train
        X_train = []
        Y_train = []
        
        input_dir  = path.join(getcwd(), 'train')
        output_dir = path.join(getcwd(), 'training_data')
    else:
        global X_test
        X_test = []
        input_dir  = path.join(getcwd(), 'test')
        output_dir = path.join(getcwd(), 'testing_data')
        
    images = listdir(input_dir)
    for image_name in images:
        
        # creating a blank image of size max_width * max_height
        background = Image.new('1', (max_width, max_height), 'white')
        
        image = Image.open(path.join(input_dir, image_name))
        width, height = image.size
        
        background.paste(image, ((max_width/2)-(width/2), (max_height/2)-(height/2)))
        background.save(path.join(output_dir, image_name))
        
        extractFeature(background, image, image_name, task)
        
    if task == 'train':
        print('Stats: Training...')
        print('Size of training data: ', len(X_train))
        print('Number of features: ', len(X_train[0]))
        
        print('A data point in final training data: ', X_train[0])
        print('A label in training data: ', Y_train[0])
        
        shape = list(X_train[0].shape)
        shape[:0] = [len(X_train)]
        X_train = np.concatenate(X_train).reshape(shape)
        
        # Saving training data on disk
        np.save('X_train', X_train)
        print('Saved X_train (features)')
        np.save('Y_train', Y_train)
        print('Saved Y_train (labels)')
        
    else:
        print('Stats: Testing...')
        print('Size of testing data: ', len(X_test))
        print('Number of features: ', len(X_test[0]))
        print('A data point in final training data: ', X_test[0])
        
        shape = list(X_test[0].shape)
        shape[:0] = [len(X_test)]
        X_test = np.concatenate(X_test).reshape(shape)
        
        np.save('X_test', X_test)
        print('Saved X_test (labels)')


TypeError: integer argument expected, got float

In [17]:
def extractFeature(background, image, imageName, task):
    #Extracts features from characters
    
    xtrain = []
    
    #feature 1: width
    xtrain.append(image.size[0])
    
    #feature 2: height
    xtrain.append(image.size[1])
    
    #feature : aspect ratio
#     xtrain.append(float(image.size[1])/image.size[0])
    
    #feature : number of pixels
    xtrain.append(image.size[1]*image.size[0])
    
    
    #feature 3: ratio of white to black pixels
    whitePixels = white_pixels(background)
    totalPixels = background.size[0]*background.size[1]
    blackPixels = totalPixels - whitePixels
#     xtrain.append(whitePixels/blackPixels)
    
    #feature 3: vertical_symmetry
    value = vertical_symmetry(background)
    xtrain.append(value)
    
    #feature 3: horizontal_symmetry
    value = horizontal_symmetry(background)
    xtrain.append(value)
    
    #featuer 3: x_histogram
    x_hist_values = x_histogram(background)
    xtrain = np.concatenate((xtrain, x_hist_values), axis = 0)
    
    #featuer 4: y_histogram
    y_hist_values = y_histogram(background)
    xtrain = np.concatenate((xtrain, y_hist_values), axis = 0)
    
    
    #Writing to final data
    if(task=="train"):
        X_train.append(xtrain)
        Y_train.append(imageName.split("_")[2].split(".")[0])
    else:
        X_test.append(xtrain)

In [16]:
def x_histogram(background):
    hist_values = []
    width,height = background.size
    pix = np.asarray(background).transpose()
    for x in range(width):
        hist_values.append( np.add.reduce(pix[x]))
    return hist_values

def y_histogram(background):
    hist_values = []
    width,height = background.size
    pix = np.asarray(background)
    for x in range(height):
        hist_values.append( np.add.reduce(pix[x]))
    return hist_values

def white_pixels(background):
    pixeldata = np.asarray(background)
    return sum(np.add.reduce(pixeldata))

def vertical_symmetry(background):
    width,height = background.size
    first_half = np.array(background.crop((0, 0, width/2, height)).getdata())
    second_half = np.array(ImageOps.mirror(background.crop((width/2, 0, width, height)).getdata()))
    second_half = second_half[:len(first_half)]
    return int(np.linalg.norm(first_half-second_half))

def horizontal_symmetry(background):
    width,height = background.size
    first_half = np.array(background.crop((0, 0, width, height/2)).getdata())
    second_half = np.array(ImageOps.mirror(background.crop((0, height/2, width, height)).getdata()))
    second_half = second_half[:len(first_half)]
    return  int(np.linalg.norm(first_half-second_half))