In [2]:
import numpy as np
import keras

In [3]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras.preprocessing import image


In [4]:
# Aquire training, validation & prediction sets
batches = image.ImageDataGenerator().flow_from_directory(
    'data/training',
    target_size=(512, 512), class_mode='categorical', shuffle=True, batch_size=16
)

val_batches = image.ImageDataGenerator().flow_from_directory(
    'data/validation',
    target_size=(512, 512), class_mode='categorical', shuffle=True, batch_size=16
)

test_batches = image.ImageDataGenerator().flow_from_directory(
    'data/test/',
    target_size=(512, 512), class_mode=None, shuffle=False, batch_size=32
)

Found 1659 images belonging to 5 classes.
Found 550 images belonging to 5 classes.
Found 560 images belonging to 5 classes.


In [5]:
model = Sequential()
# Increase the input shape and generator shape for more features. Number of features = side * side (224 * 224)
model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(512, 512, 3)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

In [6]:
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

In [7]:
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

In [8]:
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

In [9]:
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

In [10]:
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

In [11]:
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(5, activation='softmax'))

In [12]:
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 510, 510, 16)      448       
                                                                 
 conv2d_1 (Conv2D)           (None, 508, 508, 16)      2320      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 254, 254, 16)     0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 254, 254, 16)      0         
                                                                 
 conv2d_2 (Conv2D)           (None, 252, 252, 32)      4640      
                                                                 
 conv2d_3 (Conv2D)           (None, 250, 250, 32)      9248      
                                                        

  super(Adam, self).__init__(name, **kwargs)


In [13]:
# model.load_weights('weights.h5')
model.fit(batches, validation_data=val_batches, shuffle=True, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x2363e196f40>

In [14]:
# model.save_weights('weights.h5')

In [15]:
preds = model.predict_generator(test_batches, test_batches.samples)

  preds = model.predict_generator(test_batches, test_batches.samples)




In [17]:
# Write to CSV
import csv
import re
import numpy as np
from random import randint

def transform(arr):
    '''Takes a list and turns the highest number into 1, else 0'''
    high = 0
    for n in arr:
        if n > high:
            high = n
    return [1 if n == high else 0 for n in arr]


def getClassMap(csvPath='./data/train.csv'):
    classMap = {}

    with open(csvPath, 'r') as classMapCSV:
        classMapCSV = csv.reader(classMapCSV, delimiter=',', quotechar='|')
        for row in classMapCSV:
            classMap[row[0]] = row[1]

    return classMap

def getPredictions(i):
    classPredictions = list(zip(classes, preds[i]))
    sortedByConfidence = sorted(classPredictions, key=lambda p: p[1], reverse=True)
    best5guesses = [guess[0] for guess in sortedByConfidence[:5]]

    return ' '.join(best5guesses)

header = ['Image', 'Id']
classMap = getClassMap('./data/train.csv')
classes = list(iter(test_batches.class_indices))

i = 0
match = 0
noMatch = 0
randomMatch = 0
matchedClasses = {
    '0': 0,
    '1': 0,
    '2': 0,
    '3': 0,
    '4': 0,
}
while 1:
    try:
        fileName = test_batches.filenames[i].split('/')[1].split('.')[0]
        prediction, actual = (getPredictions(i)[0], classMap[fileName])
        if prediction == actual:
            matchedClasses[prediction] += 1
            match += 1
        else:
            noMatch += 1
        i += 1
        
        guess = randint(0, 4)
        if str(guess) == prediction:
            randomMatch += 1
    except (IndexError, KeyError):
        break
        print('Skipped')
        pass

#print('total', '\t', 'pred', '\t', 'wrong', '\t', 'guess')
#print(i, '\t', match, '\t', noMatch, '\t', randomMatch)
#print('Actual acc: ', match / i)
#print('Accidental acc: ', randomMatch / i)
#print(matchedClasses)


# for i in range(len(preds)):
#     try:
#         fileName = test_batches.filenames[i].split('/')[1].split('.')[0]
#         row = [getPredictions(i)[0], classMap[fileName]]
#         print('match' if row[0] == row[1] else None)
#     except (IndexError, KeyError):
#         print('Skipped')
#         pass

In [18]:
# Make predictions on Test data
# Consider the rate at which you're getting them right
# Look at examples of what you get right and what you get wrong

- Save the weights after each epoch
- Try out more optimizers and see if results change
- Try a larger image size
- Show someone your architecture for advice
- Figure out the meaning of each layers, specifically read keras code
- Run up 50 epochs overnight to see if the results get better
- Make a ton of predictions to see how right it's getting it. Make your own error metric