In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import re
import numpy as np
import pandas as pd

from glob import glob

from src.dogFunctions import *
from src.imageTrans import plotGrid


from keras.preprocessing import image
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, ELU, BatchNormalization
from keras.optimizers import Adam
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.vgg16 import VGG16
from keras.applications.inception_resnet_v2 import InceptionResNetV2

Using TensorFlow backend.


In [2]:
def plotLosses( history ):
    """Plots training/validation loss as a fucntion of epoch."""

    fig = plt.figure( figsize = (18,10) )
    plt.plot( range(1, len(history["loss"]) + 1), history["loss"], "b-",
              linewidth = 3, label = "$\mathrm{training}$")
    plt.plot( range(1, len(history["val_loss"]) + 1), history["val_loss"], "g-",
              linewidth = 3, label = "$\mathrm{validation}$")
    plt.ylabel("$\mathrm{Loss}$")
    plt.xlabel("$\mathrm{Epoch}$")
    plt.legend( loc = "best" )

    plt.show()

    return

def plotAcc( history ):
    """Plots training/validation accuracy as a fucntion of epoch."""

    fig = plt.figure( figsize = (18,10) )
    plt.plot( range(1, len(history["acc"]) + 1), history["acc"], "b-",
              linewidth = 3, label = "$\mathrm{training}$")
    plt.plot( range(1, len(history["val_acc"]) + 1), history["val_acc"], "g-",
              linewidth = 3, label = "$\mathrm{validation}$")
    plt.ylabel("$\mathrm{Accuracy}$")
    plt.xlabel("$\mathrm{Epoch}$")
    plt.legend( loc = "best" )

    plt.show()

    return

def accurracy(X, y):
    
    preds = np.argmax( model.predict( X ), axis = 1 )
    y = np.argmax( y, axis = 1 )
    
    return np.sum( preds == y ) / len(y)

def denseBlock( x, size, dropout ):
    """Createsn one dense layer unit."""

    dense      = Dense( size, use_bias = False, activation = "elu" )( x )
    bn         = ELU()( BatchNormalization()(dense) )
    dropout    = Dropout( dropout )( bn )
    
    return dropout

def genModel( imgSize = 256, dropout = 0.5 ):
    """Generates the VAE model."""

    pretrained = InceptionResNetV2( input_shape = (imgSize, imgSize, 3), weights = 'imagenet',
                                    include_top = False )#, pooling = "avg" )
    inputLayer = pretrained.output

    flat = Flatten()( inputLayer )

    dense1 = denseBlock( flat,   1024, dropout )
    dense2 = denseBlock( dense1, 1024, dropout )
    dense3 = denseBlock( dense2, 1024, dropout )
    dense4 = denseBlock( dense3, 1024, dropout )
    dense5 = denseBlock( dense4, 1024, dropout )
    dense6 = denseBlock( dense5, 1024, dropout )

    outputLayer = Dense( 120, activation = "softmax" )( dense6 )

    for layer in pretrained.layers:
        layer.trainable = False

    opt = Adam() #Adam( lr = 0.1, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1.0 )

    model = Model( inputs = pretrained.input, outputs = outputLayer )
    model.compile( optimizer = opt, loss = "categorical_crossentropy", metrics = [ 'acc' ] )
    
    return model

In [3]:
trainFresh = False

breeds = readBreeds()
labels = pd.read_csv("labels.csv")

labels['breed'] = pd.Categorical( labels['breed'], categories = breeds )
labels['breed'] = labels['breed'].cat.codes
    
if ( trainFresh ):

    trainFiles = np.array( glob('./trainCrop/*.jpg') )
    
    valFiles = sampleDogs( trainFiles, labels, 0.147 )
    valFiles = np.array( list(set(valFiles)) )

    tmp = []
    
    for f in trainFiles:
        if ( not (f in valFiles) ):
            tmp.append(f)
            
    trainFiles = np.array( tmp )
    np.random.shuffle(trainFiles)

    writeFilesList( "trainFiles.txt", trainFiles )
    writeFilesList( "valFiles.txt", valFiles )

else:
    valFiles   = readSavedFiles( "valFiles.txt" )
    trainFiles = readSavedFiles( "trainFiles.txt" )

In [4]:
imgSize = 256
dropout = 0.25

model = genModel( imgSize, dropout )
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 127, 127, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 127, 127, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 127, 127, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [None]:
batchSize = 64

#if ( not trainFresh ):
#    model.load_weights( "./best/dogClass.hdf5" )

earlyStopper = EarlyStopping( patience = 50, verbose = 1 )
checkPointer = ModelCheckpoint( filepath = "./best/dogClass.hdf5", save_best_only = True, verbose = 1 )

losses = model.fit_generator( genBatch( trainFiles, labels, batchSize, imgSize, True ),
                              steps_per_epoch = len(trainFiles) // batchSize,
                              validation_data = genBatch( valFiles, labels, batchSize, imgSize, False ),
                              validation_steps = len(valFiles) // batchSize,
                              epochs = 5000, callbacks = [ earlyStopper, checkPointer ] )

Epoch 1/5000
Epoch 00001: val_loss improved from inf to 1.56125, saving model to ./best/dogClass.hdf5
Epoch 2/5000
Epoch 00002: val_loss improved from 1.56125 to 0.98268, saving model to ./best/dogClass.hdf5
Epoch 3/5000
Epoch 00003: val_loss improved from 0.98268 to 0.75135, saving model to ./best/dogClass.hdf5
Epoch 4/5000
Epoch 00004: val_loss did not improve
Epoch 5/5000
Epoch 00005: val_loss improved from 0.75135 to 0.73026, saving model to ./best/dogClass.hdf5
Epoch 6/5000
Epoch 00006: val_loss improved from 0.73026 to 0.69024, saving model to ./best/dogClass.hdf5
Epoch 7/5000
Epoch 00007: val_loss did not improve
Epoch 8/5000
Epoch 00008: val_loss improved from 0.69024 to 0.67514, saving model to ./best/dogClass.hdf5
Epoch 9/5000
Epoch 00009: val_loss improved from 0.67514 to 0.65080, saving model to ./best/dogClass.hdf5
Epoch 10/5000
Epoch 00010: val_loss improved from 0.65080 to 0.64017, saving model to ./best/dogClass.hdf5
Epoch 11/5000
Epoch 00011: val_loss did not improve
E

In [None]:
plotLosses( losses.history )
plotAcc( losses.history )

In [None]:
imgSize = 256
dropout = 0.25

model = genModel( imgSize, dropout )
model.load_weights(  "./best/dogClass.hdf5" )

In [None]:
testFiles  = np.array( glob('./testCroped/*.jpg') )

def getImage( file, size = 256 ):
    """Loads a single image with its ID code."""

    imgID = getImageId( file )
    img = image.load_img( file, target_size = (size, size) )
    img = image.img_to_array(img)/255

    return imgID, img

ids = []
predictions = []

for f in testFiles:
    imgID, img = getImage(f)
    
    pred = model.predict( np.array([img]) )
    
    ids.append( imgID )
    predictions.append( pred[0] )

ids = np.array(ids)
predictions = np.array( predictions )

print( predictions.shape )

In [None]:
testData = pd.DataFrame( predictions, columns = breeds, index = ids )
testData.index.name = "id"

In [None]:
testData.to_csv( "testRes.csv" )