A program that reads and processes images for a Convolutional Neural Network (CNN) to classify as images as good or bad.

In [7]:
import numpy as np
import pandas as pd
import os
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline
import cv2

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.utils import np_utils
from keras import optimizers
from keras import callbacks
from keras.models import load_model
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from scipy.misc import imresize  


In [2]:
def read_images(paths): 
    """
    Reads in all images and returns list of picture id numbers based on the image name
    
    Parameters
    ----------
    paths : string
    
    Returns
    ----------
    images and list of id numbers
    """
    # Get list of images
    images = (glob(paths + '*.jpg'))
    # Read images from list
    data = [cv2.imread(file) for file in images]

    return data

In [8]:
def process_images(images, size = 60):
    """
    Import image at 'paths', center and crop to size
    Code from https://github.com/jameslawlor/kaggle_galaxy_zoo/blob/master/galaxy_zoo_keras.ipynb
    """

    count = len(images)
    arr = np.zeros(shape=(count,size,size,3))
    for i in range(count):
        img = images[i]
        img = img.T[:,106:106*3,106:106*3] # Crop 424x424x3 to 212x212x3
        img = imresize(img,size=(size,size,3),interp="cubic") # Shrink size to make easier to compute
        arr[i] = img

    return arr.astype(int)

In [9]:
def cnn_layers(X_train, y_train, X_test, y_test, batch_size = 4, nb_classes = 4, nb_epoch = 20, input_size = (60,60, 3)):
    """
    Builds layers of Convolutional Neural Net
    Fits model to the data
    
    Parameters
    ------------
    X_train = array
    X_test = array
    y_train = data frame or array
    y_test = data frame or array
    batch_size = integer
    nb_classes = integer
    nb_epoch = integer
    input_size = list
    
    Returns
    ------------
    model metrics evaluation
    """
    
    model = Sequential()

    # First convolutional layer and pooling
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(input_size), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # Second convolutional layer and pooling
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(input_size), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # Flatten images
    model.add(Flatten())
    
    # First dense layer
    model.add(Dense(32, init='glorot_normal'))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    # Second dense layer
    model.add(Dense(32, init='glorot_normal'))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    # Output layer
    model.add(Dense(nb_classes, init='glorot_normal'))
    model.add(Activation('softmax'))
    
    # Initializes optimizer SGD
    # Need to see which learning rate (lr) achieves best results
    sgd = optimizers.SGD(lr=0.005, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"])
    
    # Early stopping batch = X_train, y_train, X_test, y_test
    # Need to experiment with patience 
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=7, verbose=1, mode='auto')
    
    checkpointer = callbacks.ModelCheckpoint(filepath=('checkpoint.hdf5'), verbose=1, save_best_only=True)
    
    # Hist = callbacks.History()
    
    model.fit(X_train, y_train, verbose=2, callbacks = [early_stopping, checkpointer], batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test, y_test))
    
    return model, model.evaluate(X_test, y_test, verbose=1)

In [10]:
def convert_targets(targets):
    return pd.get_dummies(targets).values

In [11]:
df_train = pd.read_csv('/Users/micha/ea-applications/data/test-images.csv')
print(df_train.head())
paths = '/Users/micha/ea-applications/data/training-test-images/Thermal/mytest/*MEDIA/'

train_images = read_images(paths)
train_arr = process_images(train_images)

y = np.array(df_train['Label'])

y = convert_targets(y)
X_train, X_test, y_train, y_test = train_test_split(train_arr, y, random_state=42, test_size=0.2)

  Image Date     MEDIA Image_Name  Label Unnamed: 4
0  2/16/2019  100MEDIA   DJI_0001      2        NaN
1  2/16/2019  100MEDIA   DJI_0002      2        NaN
2  2/16/2019  100MEDIA   DJI_0003      2        NaN
3  2/16/2019  100MEDIA   DJI_0004      2        NaN
4  2/16/2019  100MEDIA   DJI_0005      2        NaN


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  if sys.path[0] == '':


In [12]:
model, metrics = cnn_layers(X_train, y_train, X_test, y_test, batch_size = 4, nb_classes = 4, nb_epoch = 10)
print(metrics)



Instructions for updating:
Colocations handled automatically by placer.




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 339 samples, validate on 85 samples
Epoch 1/10
 - 4s - loss: 4.0094 - acc: 0.7493 - val_loss: 3.6029 - val_acc: 0.7765

Epoch 00001: val_loss improved from inf to 3.60287, saving model to checkpoint.hdf5
Epoch 2/10
 - 2s - loss: 3.5184 - acc: 0.7817 - val_loss: 3.6029 - val_acc: 0.7765

Epoch 00002: val_loss did not improve from 3.60287
Epoch 3/10
 - 2s - loss: 3.5184 - acc: 0.7817 - val_loss: 3.6029 - val_acc: 0.7765

Epoch 00003: val_loss did not improve from 3.60287
Epoch 4/10
 - 2s - loss: 3.4709 - acc: 0.7847 - val_loss: 3.6029 - val_acc: 0.7765

Epoch 00004: val_loss did not improve from 3.60287
Epoch 5/10
 - 2s - loss: 3.3282 - acc: 0.7935 - val_loss: 3.6029 - val_acc: 0.7765

Epoch 00005: val_loss did not improve from 3.60287
Epoch 6/10
 - 2s - loss: 3.6135 - acc: 0.7758 - val_loss: 3.6029 - val_acc: 0.7765



In [15]:
# Put path to unlabeled images
paths = '/Users/micha/ea-applications/thermal/02-15-2019/100MEDIA/'

test_images = read_images(paths)
Xnew = process_images(train_images)

ynew = model.predict_classes(Xnew)
print(ynew)

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  if sys.path[0] == '':


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [None]:
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(model_boosting, open(filename, 'wb'))