# Business Case

# Reading in the Data

In [59]:
from PIL import Image
import numpy as np
import os
import time
import pickle
import matplotlib.pyplot as plt

In [60]:
LOAD_FROM_PICKLES = True # Set if data should be loaded from pickles if they exists
SAVE_TO_PICKLES = True # Set if data should be saved to pickles where applicable

In [61]:
# Loads a single image from a given path
def _load_image(filename):
    # load the image
    img = Image.open(filename)
    # convert to numpy array and add a column to the end of the file:
    data = np.asarray(img)
    # Flattening the image to a 1d array
    data = data.flatten()
    return data

# Loads all images from a given directory
def _load_images_from_sub_directory(directory):
    start = time.time()
    images = []
    for filename in os.listdir(directory):
        if filename[-4:] == '.jpg':
            images.append(_load_image(directory + '/' + filename))
    # return images as numpy array:
    return np.asarray(images)

# Loads all images from subdirectories of the given directory and returns as a numpy array
def load_images(directory):
    images = []
    labels = []
    i = 0
    for subdir in os.listdir(directory):
        if subdir[0] == ".":
            continue
        subdir_path = directory + '/' + subdir + '/'
        subdir_images = _load_images_from_sub_directory(subdir_path)
        labels.extend([subdir] * len(subdir_images))
        images.append(subdir_images)
        i += 1
        print("Loaded " + str(i) + " subdirectories")
    return np.concatenate(images), np.asarray(labels)

Loading images from pickled files if they exist and the option to load is specified

In [62]:
%%time
# Loading to pickle files if the option is specified
if LOAD_FROM_PICKLES and os.path.exists("pickles/images.pickle") and os.path.exists("pickles/labels.pickle"):
    with open("pickles/images.pickle", "rb") as f:
        images = pickle.load(f)
    with open("pickles/labels.pickle", "rb") as f:
        labels = pickle.load(f)
else:
    images, labels = load_images("CNN_letter_Dataset")
    # Saving to pickle files if the option is specified
    if SAVE_TO_PICKLES:
        with open("pickles/images.pickle", "wb") as f:
            pickle.dump(images, f)
        with open("pickles/labels.pickle", "wb") as f:
            pickle.dump(labels, f)

CPU times: user 948 µs, sys: 257 ms, total: 258 ms
Wall time: 576 ms


In [63]:
# Shuffling the images so they are not in order
indices = np.arange(len(images))
np.random.shuffle(indices)
images = images[indices]
labels = labels[indices]