In [1]:
import numpy as np
import os
import cv2
import pandas as pd
import pickle
import tensorflow as tf
from tqdm import tqdm_notebook
from scipy.spatial.distance import hamming, cosine

%matplotlib inline

## Step 1: Image Preprocessing
Load images one at a time and output tuples containing width and height of each image. Resize and convert color information from BGR to RGB using OpenCV (cv2) module. Open dataset and read the labels, training and testing images, and image size tuples that were created in image_loader function.

In [21]:
def image_loader(image_path, image_size):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.array(image)
    image = cv2.resize(image, image_size, cv2.UMat(cv2.INTER_CUBIC))
    return image

In [22]:
images = []
for image_name in os.listdir('dataset/train/'):
    image_path = os.path.join('dataset/train/', image_name)
    print(image_loader(image_path, (32, 32)))
    print(image_path)

<UMat 0x13dff6130>
dataset/train/2_truck.png
<UMat 0x13dff6130>
dataset/train/1_truck.png
<UMat 0x13dff6130>
dataset/train/0_frog.png


In [40]:
def image_loader(image_path, image_size):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, image_size, cv2.UMat(cv2.INTER_CUBIC))
    return image

In [41]:
def dataset_preprocessing(dataset_path, labels_path, image_size, image_paths_pickle):
    with open(labels_path, 'r') as f:
        classes = f.read().split('\n')[:-1]
        
    images = []
    labels = []
    image_paths = []
    
    for image_name in os.listdir(dataset_path):
        try:
            image_path = os.path.join(dataset_path, image_name)
            images.append(image_loader(image_path, image_size))
            image_paths.append(image_path)
            for idx in range(len(classes)):
                if classes[idx] in image_name: #Example: 0_frog.png
                    labels.append(idx)
        except:
            pass
    
    with open(image_paths_pickle + '.pickle', 'wb') as f:
        pickle.dump(image_paths, f)
    
    assert len(images) == len(labels)
    return np.array(images), np.array(labels)

In [42]:
images, labels = dataset_preprocessing('dataset/train/', 'dataset/labels.txt', (32, 32), 'training_images_pickle')

In [43]:
labels.shape

(3,)