In [125]:
import theano
print(theano.config.device)
print(theano.config.floatX)

cpu
float32


In [126]:
import numpy as np
import keras

In [127]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras.preprocessing import image


In [128]:
# Aquire training, validation & prediction sets
batches = image.ImageDataGenerator().flow_from_directory(
    'data/training',
    target_size=(256, 256), class_mode='categorical', shuffle=True, batch_size=16
)

val_batches = image.ImageDataGenerator().flow_from_directory(
    'data/validation',
    target_size=(256, 256), class_mode='categorical', shuffle=True, batch_size=16
)

test_batches = image.ImageDataGenerator().flow_from_directory(
    'data/test/',
    target_size=(256, 256), class_mode=None, shuffle=False, batch_size=32
)

Found 1659 images belonging to 5 classes.
Found 550 images belonging to 5 classes.
Found 560 images belonging to 5 classes.


In [129]:
def ConvBlock(model, layers, filters):
        """
            Adds a specified number of ZeroPadding and Covolution layers
            to the model, and a MaxPooling layer at the very end.

            Args:
                layers (int):   The number of zero padded convolution layers
                                to be added to the model.
                filters (int):  The number of convolution filters to be
                                created for each layer.
        """
        for i in range(layers):
            model.add(ZeroPadding2D((1, 1)))
            model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2), strides=(2, 2)))

In [137]:
model = Sequential()
# Increase the input shape and generator shape for more features. Number of features = side * side (224 * 224)
model.add(Conv2D(128, (2, 2), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(128, (2, 2), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [138]:
model.add(Conv2D(64, (2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(64, (2, 2), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [139]:
model.add(Conv2D(32, (2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(32, (2, 2), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [140]:
model.add(Conv2D(16, (2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(16, (2, 2), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [141]:
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))

In [142]:
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_160 (Conv2D)          (None, 255, 255, 128)     1664      
_________________________________________________________________
max_pooling2d_149 (MaxPoolin (None, 127, 127, 128)     0         
_________________________________________________________________
dropout_85 (Dropout)         (None, 127, 127, 128)     0         
_________________________________________________________________
conv2d_161 (Conv2D)          (None, 126, 126, 64)      32832     
_________________________________________________________________
max_pooling2d_150 (MaxPoolin (None, 63, 63, 64)        0         
_________________________________________________________________
dropout_86 (Dropout)         (None, 63, 63, 64)        0         
_________________________________________________________________
conv2d_162 (Conv2D)          (None, 62, 62, 32)        8224      
__________

In [None]:
# model.load_weights('weights.h5')
model.fit_generator(batches, validation_data=val_batches, shuffle=True, epochs=10)

Epoch 1/10
  3/104 [..............................] - ETA: 12:38 - loss: 12.7515 - acc: 0.2083

In [None]:
# model.save_weights('weights.h5')

In [None]:
preds = model.predict_generator(test_batches, test_batches.samples)

In [None]:
# Write to CSV
import csv
import re
import numpy as np
from random import randint

def transform(arr):
    '''Takes a list and turns the highest number into 1, else 0'''
    high = 0
    for n in arr:
        if n > high:
            high = n
    return [1 if n == high else 0 for n in arr]


def getClassMap(csvPath='./data/train.csv'):
    classMap = {}

    with open(csvPath, 'r') as classMapCSV:
        classMapCSV = csv.reader(classMapCSV, delimiter=',', quotechar='|')
        for row in classMapCSV:
            classMap[row[0]] = row[1]

    return classMap

def getPredictions(i):
    classPredictions = list(zip(classes, preds[i]))
    sortedByConfidence = sorted(classPredictions, key=lambda p: p[1], reverse=True)
    best5guesses = [guess[0] for guess in sortedByConfidence[:5]]

    return ' '.join(best5guesses)

header = ['Image', 'Id']
classMap = getClassMap('./data/train.csv')
classes = list(iter(test_batches.class_indices))

i = 0
match = 0
noMatch = 0
randomMatch = 0
matchedClasses = {
    '0': 0,
    '1': 0,
    '2': 0,
    '3': 0,
    '4': 0,
}
while 1:
    try:
        fileName = test_batches.filenames[i].split('/')[1].split('.')[0]
        prediction, actual = (getPredictions(i)[0], classMap[fileName])
        if prediction == actual:
            matchedClasses[prediction] += 1
            match += 1
        else:
            noMatch += 1
        i += 1
        
        guess = randint(0, 4)
        if str(guess) == prediction:
            randomMatch += 1
    except (IndexError, KeyError):
        break
        print('Skipped')
        pass

print('total', '\t', 'pred', '\t', 'wrong', '\t', 'guess')
print(i, '\t', match, '\t', noMatch, '\t', randomMatch)
print('Actual acc: ', match / i)
print('Accidental acc: ', randomMatch / i)
print(matchedClasses)


# for i in range(len(preds)):
#     try:
#         fileName = test_batches.filenames[i].split('/')[1].split('.')[0]
#         row = [getPredictions(i)[0], classMap[fileName]]
#         print('match' if row[0] == row[1] else None)
#     except (IndexError, KeyError):
#         print('Skipped')
#         pass

In [None]:
# Make predictions on Test data
# Consider the rate at which you're getting them right
# Look at examples of what you get right and what you get wrong

- Save the weights after each epoch
- Try out more optimizers and see if results change
- Try a larger image size
- Show someone your architecture for advice
- Figure out the meaning of each layers, specifically read keras code
- Run up 50 epochs overnight to see if the results get better
- Make a ton of predictions to see how right it's getting it. Make your own error metric