A program that reads and processes images for a Convolutional Neural Network (CNN) to classify as images as good or bad.

In [None]:
import numpy as np
import pandas as pd
import os
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline
import cv2

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.utils import np_utils
from keras import optimizers
from keras import callbacks
from keras.models import load_model
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.misc import imresize
from sklearn.preprocessing import OneHotEncoder

In [None]:
def read_images(paths): 
    """
    Reads in all images and returns list of picture id numbers based on the image name
    
    Parameters
    ----------
    paths : string
    
    Returns
    ----------
    images and list of id numbers
    """
    import numpy as np
    #Get list of images
    images = (glob(paths + '*.jpg'))
    #Read images from list
    data = [cv2.cvtColor(cv2.imread(file),cv2.COLOR_BGR2GRAY) for file in images]
    data = np.array(data)
    data = data.reshape((len(data),-1))
    print("+++++++++++++++++++++++")
    print(data.shape)
    return data

In [None]:
def process_images(images, size = 60):
    """
    Import image at 'paths', center and crop to size
    Code from https://github.com/jameslawlor/kaggle_galaxy_zoo/blob/master/galaxy_zoo_keras.ipynb
    """
    import rasterio as rio
    y = []
    
    for img in images:
        print(img)
        image = cv2.imread(img)
        gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        y.append(gray)

    return y

In [None]:
def scale_features(X):
    '''
    input: X (np array of any dimensions)
    cast as floats for division, scale between 0 and 1
    output: X (np array of same dimensions)
    '''
    X = X.astype("float32")
    X /= 255
    return X

In [None]:
def cnn_layers(x_train, y_train, x_test, y_test, batch_size = 4, nb_classes = 2, nb_epoch = 20, input_size = (60,60, 3)):
    """
    Builds layers of Convolutional Neural Net
    Fits model to the data
    
    Parameters
    ------------
    x_train = array
    x_test = array
    y_train = data frame or array
    y_test = data frame or array
    batch_size = integer
    nb_classes = integer
    nb_epoch = integer
    input_size = list
    
    Returns
    ------------
    model metrics evaluation
    """
    
    from sklearn import svm
    from sklearn.metrics import classification_report
    from sklearn.metrics import accuracy_score
    model = svm.SVC(gamma=0.001)
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    print(classification_report(y_test,y_pred))
    
    #model.fit(x_train, y_train, verbose=2, callbacks = [early_stopping, checkpointer], batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(x_test, y_test))
    
    return model,accuracy_score(y_test,y_pred)

In [None]:
def convert_targets(targets):
    return pd.get_dummies(targets).values

In [None]:
df_train = pd.read_csv('/Users/micha/ea-applications/data/test-images.csv')
print(df_train.head())
paths = '/Users/micha/ea-applications/data/training-test-images/'

train_images = read_images(paths)


#train_arr = process_images(train_images)

# y = df_train.drop(['Image_Name'], axis=1)
y = np.array(df_train['Label'])

# enc = OneHotEncoder(categorical_features=2, handle_unknown='ignore')
# enc.fit(y)

y = df_train['Label'].values

#y = convert_targets(y)
x_train, x_test, y_train, y_test = train_test_split(train_images, y, random_state=42, test_size=0.2)
print("------------------")
print(x_train.shape)
print(y_train)

In [None]:
model, metrics = cnn_layers(x_train, y_train, x_test, y_test, batch_size = 4, nb_classes = 2, nb_epoch = 10)
print(metrics)