In [2]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from numpy.lib.stride_tricks import as_strided
from scipy.signal import convolve2d

from PIL import Image
%matplotlib inline

plt.rcParams['image.cmap'] = 'viridis'

In [None]:
DATA_PATH = '/Users/anjueappen/Downloads/png'
def get_classes(folder):
    return [x[0].split('/')[-1] for x in os.walk(folder)][1:]

classes = get_classes(DATA_PATH)
print(len(classes))
print(classes)

"""
Image Preprocessing: 
1. Get class labels (aka directory names)
2. Create template vector by length of number of classes --> ground truth/label vector 
3. Read each image from classes and flatten, append ground truth vector to image
    - Note down where the ground truth vector starts/resize foreign images to same as existing dataset
4. Aggregate all images and split into K groups (if not equally divisible, throw error)
5. def get_batch(batch_size): return a random batch from K-1 groups

Parameters from net: 
(1) batch size 
(2) data directory (therefore image dimensions/ground truth length etc.)
"""

def read_and_flatten(class_dir, ground_truth):
    fnames = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
    images = [np.append(np.ndarray.flatten(np.array(Image.open(os.path.join(class_dir, f)))), ground_truth) 
              for f in fnames]
    
    return images

def preprocess(directory):
    classes = get_classes(DATA_PATH)
    num_classes = len(classes)
    ground_truth_template = np.zeros(num_classes)
    
    imgs = []
    for i in range(num_classes):
        ground_truth = ground_truth_template
        ground_truth[i] = 1 # set label for this 
        
        imgs.extend(read_and_flatten(DATA_PATH +'/'+ classes[i], ground_truth))
    return imgs

print(len(preprocess(DATA_PATH)))
    

250
['airplane', 'alarm clock', 'angel', 'ant', 'apple', 'arm', 'armchair', 'ashtray', 'axe', 'backpack', 'banana', 'barn', 'baseball bat', 'basket', 'bathtub', 'bear (animal)', 'bed', 'bee', 'beer-mug', 'bell', 'bench', 'bicycle', 'binoculars', 'blimp', 'book', 'bookshelf', 'boomerang', 'bottle opener', 'bowl', 'brain', 'bread', 'bridge', 'bulldozer', 'bus', 'bush', 'butterfly', 'cabinet', 'cactus', 'cake', 'calculator', 'camel', 'camera', 'candle', 'cannon', 'canoe', 'car (sedan)', 'carrot', 'castle', 'cat', 'cell phone', 'chair', 'chandelier', 'church', 'cigarette', 'cloud', 'comb', 'computer monitor', 'computer-mouse', 'couch', 'cow', 'crab', 'crane (machine)', 'crocodile', 'crown', 'cup', 'diamond', 'dog', 'dolphin', 'donut', 'door', 'door handle', 'dragon', 'duck', 'ear', 'elephant', 'envelope', 'eye', 'eyeglasses', 'face', 'fan', 'feather', 'fire hydrant', 'fish', 'flashlight', 'floor lamp', 'flower with stem', 'flying bird', 'flying saucer', 'foot', 'fork', 'frog', 'frying-pan'

In [None]:
# for each image, we want to create some time for