In [11]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D, Dropout
from keras.models import Sequential
from keras import optimizers
import matplotlib.pylab as plt
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from keras.preprocessing import image
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
batch_size = 128
num_classes = 120
epochs = 10

# input image dimensions
img_x, img_y = 224, 224

# load the MNIST data set, which already splits into train and test sets for us


In [12]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(img_x, img_y))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [13]:
pth = "/home/ubuntuuser/DogBreedClassification/Notebooks/train"
label_df = pd.read_csv("/home/ubuntuuser/DogBreedClassification/Notebooks/labels.csv")
labels = list(label_df.breed)
train_paths = []
for root, dirs, files in os.walk(pth):
    for file in files:
        train_paths.append(os.path.join(root,file))
train_data = paths_to_tensor(train_paths)


  0%|          | 0/10222 [00:00<?, ?it/s][A
  0%|          | 17/10222 [00:00<01:02, 164.36it/s][A
  0%|          | 40/10222 [00:00<00:52, 194.23it/s][A
  1%|          | 61/10222 [00:00<00:51, 197.34it/s][A
  1%|          | 81/10222 [00:00<00:51, 197.67it/s][A
  1%|          | 97/10222 [00:00<00:54, 185.46it/s][A
  1%|          | 115/10222 [00:00<00:55, 183.39it/s][A
  1%|▏         | 134/10222 [00:00<00:54, 183.48it/s][A
  1%|▏         | 151/10222 [00:00<00:55, 181.72it/s][A
  2%|▏         | 169/10222 [00:00<00:55, 180.53it/s][A
  2%|▏         | 188/10222 [00:01<00:58, 171.43it/s][A
  2%|▏         | 207/10222 [00:01<00:58, 172.21it/s][A
  2%|▏         | 226/10222 [00:01<00:57, 173.41it/s][A
  2%|▏         | 246/10222 [00:01<00:57, 174.90it/s][A
  3%|▎         | 266/10222 [00:01<00:56, 176.34it/s][A
  3%|▎         | 285/10222 [00:01<00:56, 175.03it/s][A
  3%|▎         | 303/10222 [00:01<00:56, 175.26it/s][A
  3%|▎         | 321/10222 [00:01<00:57, 171.48it/s][A
  3%|▎ 

 27%|██▋       | 2735/10222 [00:16<00:45, 166.09it/s][A
 27%|██▋       | 2752/10222 [00:16<00:44, 166.11it/s][A
 27%|██▋       | 2771/10222 [00:16<00:44, 166.25it/s][A
 27%|██▋       | 2792/10222 [00:16<00:44, 166.50it/s][A
 27%|██▋       | 2811/10222 [00:16<00:44, 166.50it/s][A
 28%|██▊       | 2830/10222 [00:16<00:44, 166.57it/s][A
 28%|██▊       | 2850/10222 [00:17<00:44, 166.74it/s][A
 28%|██▊       | 2869/10222 [00:17<00:44, 166.87it/s][A
 28%|██▊       | 2889/10222 [00:17<00:43, 167.03it/s][A
 28%|██▊       | 2909/10222 [00:17<00:43, 167.19it/s][A
 29%|██▊       | 2929/10222 [00:17<00:43, 167.33it/s][A
 29%|██▉       | 2949/10222 [00:17<00:43, 167.43it/s][A
 29%|██▉       | 2968/10222 [00:17<00:43, 167.55it/s][A
 29%|██▉       | 2987/10222 [00:17<00:43, 167.58it/s][A
 29%|██▉       | 3007/10222 [00:17<00:43, 167.71it/s][A
 30%|██▉       | 3026/10222 [00:18<00:42, 167.78it/s][A
 30%|██▉       | 3045/10222 [00:18<00:42, 167.87it/s][A
 30%|██▉       | 3064/10222 [00

 54%|█████▎    | 5485/10222 [00:32<00:27, 171.27it/s][A
 54%|█████▍    | 5503/10222 [00:32<00:27, 171.27it/s][A
 54%|█████▍    | 5521/10222 [00:32<00:27, 171.29it/s][A
 54%|█████▍    | 5539/10222 [00:32<00:27, 171.31it/s][A
 54%|█████▍    | 5558/10222 [00:32<00:27, 171.36it/s][A
 55%|█████▍    | 5578/10222 [00:32<00:27, 171.44it/s][A
 55%|█████▍    | 5597/10222 [00:32<00:26, 171.45it/s][A
 55%|█████▍    | 5616/10222 [00:32<00:26, 171.47it/s][A
 55%|█████▌    | 5635/10222 [00:32<00:26, 171.52it/s][A
 55%|█████▌    | 5655/10222 [00:32<00:26, 171.59it/s][A
 56%|█████▌    | 5676/10222 [00:33<00:26, 171.68it/s][A
 56%|█████▌    | 5697/10222 [00:33<00:26, 171.78it/s][A
 56%|█████▌    | 5717/10222 [00:33<00:26, 171.83it/s][A
 56%|█████▌    | 5738/10222 [00:33<00:26, 171.91it/s][A
 56%|█████▋    | 5758/10222 [00:33<00:25, 171.91it/s][A
 57%|█████▋    | 5778/10222 [00:33<00:25, 171.97it/s][A
 57%|█████▋    | 5798/10222 [00:33<00:25, 172.04it/s][A
 57%|█████▋    | 5818/10222 [00

 80%|████████  | 8189/10222 [00:47<00:11, 171.60it/s][A
 80%|████████  | 8207/10222 [00:47<00:11, 171.61it/s][A
 80%|████████  | 8225/10222 [00:47<00:11, 171.42it/s][A
 81%|████████  | 8242/10222 [00:48<00:11, 171.41it/s][A
 81%|████████  | 8262/10222 [00:48<00:11, 171.47it/s][A
 81%|████████  | 8280/10222 [00:48<00:11, 171.48it/s][A
 81%|████████  | 8299/10222 [00:48<00:11, 171.50it/s][A
 81%|████████▏ | 8317/10222 [00:48<00:11, 171.38it/s][A
 82%|████████▏ | 8337/10222 [00:48<00:10, 171.43it/s][A
 82%|████████▏ | 8358/10222 [00:48<00:10, 171.50it/s][A
 82%|████████▏ | 8377/10222 [00:48<00:10, 171.27it/s][A
 82%|████████▏ | 8399/10222 [00:49<00:10, 171.37it/s][A
 82%|████████▏ | 8421/10222 [00:49<00:10, 171.45it/s][A
 83%|████████▎ | 8442/10222 [00:49<00:10, 171.52it/s][A
 83%|████████▎ | 8462/10222 [00:49<00:10, 171.55it/s][A
 83%|████████▎ | 8482/10222 [00:49<00:10, 171.56it/s][A
 83%|████████▎ | 8502/10222 [00:49<00:10, 171.61it/s][A
 83%|████████▎ | 8521/10222 [00

MemoryError: 

Encode the labels

In [102]:
encoder = LabelEncoder()
encoder.fit(labels)
encoded_Y = encoder.transform(labels)
# convert integers to dummy variables (i.e. one hot encoded)

In [106]:
X_train, X_test, y_train, y_test = train_test_split(train_data, encoded_Y, test_size=0.33, random_state=42)

In [126]:
Y_train = np_utils.to_categorical(y_train)
Y_test = np_utils.to_categorical(y_test)

In [109]:
input_shape = (img_x, img_y, 3)

In [110]:
# convert the data to the right type
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('x_train shape:', X_train.shape)
print("input shape", input_shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

x_train shape: (6848, 224, 224, 3)
input shape (224, 224, 3)
6848 train samples
3374 test samples


In [121]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224,224,3), data_format="channels_last"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

In [129]:
history = model.fit(X_train, Y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, Y_test))

Train on 6848 samples, validate on 3374 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

KeyboardInterrupt: 