In [8]:
"""From our images, make data loader into keras compatible format
   for transfer learning on densenet-121"""
import numpy as np
from sklearn.model_selection import train_test_split
import os
import matplotlib.pyplot as plt
import glob

In [53]:
def load_data(train_dir, img_rows, img_cols):
    """
    train_dir: directory where training data is held.
    img_rows, img_cols: resolution of image (e.g. 64, 64)

    Example train_dir format:
    train_dir
       -- water_0
          -- img0.jpg
          -- img1.jpg
       -- forest_0
          -- img27.jpg
          -- img28.jpg
    Returns:
    x_train, x_test, y_train, y_test
    x_train, x_test: uint8 array of RGB image data with shape (num_samples, 3, 32, 32).
    y_train, y_test: uint8 array of category labels (integers in range 0-9) with shape (num_samples,)
"""

    data_dir = train_dir + '/**/*.jpg'
    file_list = glob.glob(data_dir)

    labels = list(map(lambda filename: filename.split(os.sep)[1], file_list))
    labels = np.array(labels)

    _, labels = np.unique(labels, return_inverse=True)
    labels = np.array(labels, dtype='uint8')

    imgs = list(map(plt.imread, file_list))

    # Resize images to have 3 channels for future work with color
    imgs = list(map(lambda x: np.resize(x, (img_rows, img_cols, 3)), imgs))
    imgs = np.array(imgs, dtype='uint8')

    X_train, X_test, y_train, y_test = train_test_split(
        imgs, labels, test_size=0.25, random_state=42, shuffle=True)

    return X_train, X_test, y_train, y_test



In [47]:
x_tr, x_te, y_tr, y_te = load_data('train_dir/', 224, 224)