In [31]:
# All imports

%matplotlib inline

import cv2
import glob, os
import imageio
import matplotlib.pyplot as plt
import numpy

from keras import backend as K
from keras import regularizers
from keras.callbacks import EarlyStopping
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dense, Dropout, Flatten
from keras.models import Sequential

from skimage import feature
from sklearn.metrics import roc_curve, auc
from sklearn.svm import SVC

from keras.preprocessing.image import img_to_array, array_to_img
from keras.utils.vis_utils import model_to_dot

from IPython.display import SVG, display


In [32]:
# Common Constants
IMG_WIDTH = 32
IMG_HEIGHT = 32

if K.image_data_format() == 'channels_first':
    INPUT_SHAPE = (3, IMG_WIDTH * 2, IMG_HEIGHT)
else:
    INPUT_SHAPE = (IMG_WIDTH * 2, IMG_HEIGHT, 3)

In [33]:
# All image processing and plot related

def plotImage(image):
    cv_rgb2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(cv_rgb2)
    plt.show()

def plotImageFromPath(imagePath):
    plotImage(cv2.imread(imagePath))

def shape_cannyEdgesOld(cv2Image, cannyImagePath):
    v = numpy.median(cv2Image)
    sigma = 0.0
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    #print "Lower", lower, "Upper", upper
    grayScaleImage = cv2.cvtColor(cv2Image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(grayScaleImage, 10, 255)
    cv2.imshow('Canny edges',edges)
    cv2.imwrite(cannyImagePath, edges)

def shape_cannyEdges(cv2Image, cannyImagePath):
    cv2Image[0,0] = (255, 0, 0)
    cv2Image[0,1] = (0, 255, 0)
    cv2Image[0,2] = (0, 0, 255)
    
    v = numpy.median(cv2Image)
    sigma = 0.0
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    #print "Lower", lower, "Upper", upper
    grayScaleImage = cv2.cvtColor(cv2Image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(grayScaleImage, 10, 255)
    cv2.imshow('Canny edges',edges)
    cv2.imwrite(cannyImagePath, edges)
    
def shape_hog(cv2Image, fileName): 
    grayScaleImage = cv2.cvtColor(cv2Image, cv2.COLOR_BGR2GRAY)
    fd, hog_image = feature.hog(grayScaleImage, orientations=8, pixels_per_cell=(16, 16),cells_per_block=(1, 1), visualise=True)
    
    cv2.imshow('HOG',hog_image)
    cv2.imwrite(fileName, hog_image)
    
def create_canny_edge_image(origImageDir, origImageName):
    
    origImagePath = origImageDir + '/' + origImageName
    print(origImagePath)
    
    cannyImageName = "canny_" + origImageName
    cannyImagePath = origImageDir + '/' + cannyImageName
    plainImage = cv2.imread(origImagePath)
    shape_cannyEdges(plainImage, cannyImagePath)
    
    return cannyImagePath
    
def create_canny_for_all_img(origImageDir):
    imgFiles = [filename for filename in os.listdir(origImageDir)
                if (not filename.startswith("canny") and filename.endswith(".jpg") )]
    for origImageName in imgFiles:
        cannyImageName = "canny_" + origImageName
        cannyImagePath = origImageDir + '/' + cannyImageName
        
        if not os.path.exists(cannyImagePath):
            create_canny_edge_image(origImageDir, origImageName)
            
def create_color_histogram (origImageDir, origImageName):
    origImagePath = origImageDir + '/' + origImageName
    plainImage = cv2.imread(origImagePath)
    
    hist = cv2.calcHist([plainImage], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = hist.flatten()
    print (hist)

In [34]:
# All image feature verctor related

def create_features_for_all_raw_img(origImageDir, isSpam):
    imgFiles = [filename for filename in os.listdir(origImageDir)
                if ((not filename.startswith("canny")) and filename.endswith(".jpg") )]

    res = [create_feature_vector(origImageDir, origImageName, isSpam) for origImageName in imgFiles]
    return numpy.array(res)
            
def create_features_for_all_canny_img(origImageDir, isSpam):
    imgFiles = [filename for filename in os.listdir(origImageDir)
                if (filename.startswith("canny") and filename.endswith(".jpg") )]
    res = [create_feature_vector(origImageDir, origImageName, isSpam) for origImageName in imgFiles]
    return numpy.array(res)
    
def create_feature_vector(origImageDir, origImageName, isSpam, numRow=IMG_HEIGHT, numCol=IMG_WIDTH):
    origImagePath = origImageDir + '/' + origImageName
    plainImage = cv2.imread(origImagePath)
    resizedImage = cv2.resize(plainImage, (numRow, numCol))
    h = resizedImage.shape[0]
    w = resizedImage.shape[1]
    res = numpy.ndarray(shape=(3, h, w), dtype=float, order='F')
    # loop over the image, pixel by pixel
    for y in range(0, h):
        for x in range(0, w):
            # threshold the pixel
            color = resizedImage[y,x] 
            res[0, y, x] = color[0] / 255.0
            res[1, y, x] = color[1] / 255.0
            res[2, y, x] = color[2] / 255.0
    return numpy.append(res.flatten(), isSpam)
        
def get_feature_with_labels(howMany=None, nonSpamDir="NaturalImages", spamDir="SpamImages"):
    create_canny_for_all_img(nonSpamDir)
    data_nonSpam_raw = create_features_for_all_raw_img(nonSpamDir, 0)
    data_nonSpam_canny = create_features_for_all_canny_img(nonSpamDir, 0)
    data_nonSpam = numpy.column_stack((data_nonSpam_raw[:,:-1], data_nonSpam_canny))

    create_canny_for_all_img(spamDir)
    data_Spam_raw = create_features_for_all_raw_img(spamDir, 1)
    data_Spam_canny = create_features_for_all_canny_img(spamDir, 1)
    data_Spam = numpy.column_stack((data_Spam_raw[:,:-1], data_Spam_canny))
    
    data = numpy.concatenate((data_nonSpam, data_Spam), axis=0)
    numpy.random.shuffle(data)

    print("data_nonSpam, using directory:", nonSpamDir)
    print(data_nonSpam)
    print("data_Spam, using directory:", spamDir)
    print(data_Spam)
    print("data")
    print(data)

    if howMany is not None:
        data = data[0:howMany]

    return data

In [35]:
# For testing, uncomment and save data to reuse
data = get_feature_with_labels(nonSpamDir="NaturalImages", spamDir="SpamImages")
#data = get_feature_with_labels(nonSpamDir="ChallengeHam1", spamDir="ChallengeSpam1")

data_nonSpam, using directory: NaturalImages
[[0.         0.         0.00392157 ... 0.         0.17254902 0.        ]
 [0.18039216 0.56078431 0.27843137 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.25490196 0.27058824 0.28627451 ... 0.         0.         0.        ]
 [0.1372549  0.14509804 0.14509804 ... 0.         0.         0.        ]
 [0.37647059 0.83529412 0.89411765 ... 0.         0.         0.        ]]
data_Spam, using directory: SpamImages
[[0.07058824 0.78431373 0.0745098  ... 0.         0.         1.        ]
 [0.95686275 0.94509804 0.94509804 ... 0.         0.         1.        ]
 [1.         1.         1.         ... 0.         0.         1.        ]
 ...
 [0.76470588 0.76078431 0.78431373 ... 0.         0.         1.        ]
 [1.         1.         1.         ... 0.         0.         1.        ]
 [0.83921569 0.98823529 0.95294118 ... 0.         0.         1.        ]]
data
[[0.96862745 0.96078431

In [36]:
# Plot related
def draw_accuracy_and_loss(history):
    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def draw_roc(y_predicted, y_actual):
    fpr, tpr, thresholds = roc_curve(y_predicted, y_actual)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=1, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.show()

def plot_keras_model(model, show_shapes=True, show_layer_names=True):
    model_svg = model_to_dot(model, show_shapes=show_shapes, show_layer_names=show_layer_names).create(prog='dot',format='svg')
    display(SVG(model_svg))

In [39]:
# Machine learning algorithm related

# Feed forward
def run_feed_forward(data, do_plot=False):
    
    INPUT_DIMENSION = len(data[0]) - 1
    LAYER_1_DIMENSION = 300
    LAYER_2_DIMENSION = 300
    OUTPUT_DIMENSION = 1
    EPOCH = 100
    
    train_len = int(len(data) * .7)
    data_train = data[0:train_len]
    data_test = data[train_len:]

    num_col = len(data_train[0]) - 1

    x_train = data_train[:, 0:num_col]
    y_train = data_train[:, num_col:].flatten()

    x_test = data_test[:, 0:num_col]
    y_test = data_test[:, num_col:].flatten()
    
    # Early stopping  
    early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')

    model = Sequential()
    model.add(Dense(LAYER_1_DIMENSION,
                    input_dim=INPUT_DIMENSION,
                    activation='relu'))
    # With regularizers
    #model.add(Dense(LAYER_1_DIMENSION,
    #                input_dim=INPUT_DIMENSION,
    #                activation='relu',
    #                kernel_regularizer=regularizers.l2(0.01),
    #                activity_regularizer=regularizers.l1(0.01)))
    model.add(Dropout(0.5))
    model.add(Dense(LAYER_2_DIMENSION,
                    activation='relu'))
    # With regularizers
    #model.add(Dense(LAYER_2_DIMENSION,
    #                activation='relu',
    #                kernel_regularizer=regularizers.l2(0.01),
    #                activity_regularizer=regularizers.l1(0.01)))
    model.add(Dropout(0.5))
    model.add(Dense(OUTPUT_DIMENSION, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    
    history = model.fit(x_train,
                        y_train,
                        epochs=EPOCH,
                        batch_size=64,
                        validation_split=0.33,
                        callbacks=[early_stop])
    
    score = model.evaluate(x_test, y_test, batch_size=64)
    print (score)
    
    if do_plot:
        plot_keras_model(model)
        draw_accuracy_and_loss(history)

    return score

# CNN
def run_cnn(data, do_plot=False):
    EPOCH = 100
    
    train_len = int(len(data) * .7)
    data_train = data[0:train_len]
    data_test = data[train_len:]

    num_col = len(data_train[0]) - 1

    # Convert 1d array back to image for convolution
    x_train_1D = data_train[:, 0:num_col]
    x_train = numpy.array([z.reshape(INPUT_SHAPE) for z in x_train_1D])
    y_train = data_train[:, num_col:].flatten()

    # Convert 1d array back to image for convolution
    x_test_1D = data_test[:, 0:num_col]
    x_test = numpy.array([z.reshape(INPUT_SHAPE) for z in x_test_1D])
    y_test = data_test[:, num_col:].flatten()
    
    # Early stopping  
    early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')

    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=INPUT_SHAPE))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    history = model.fit(x_train,
                        y_train,
                        epochs=EPOCH,
                        batch_size=64,
                        validation_split=0.33,
                        callbacks=[early_stop])

    score = model.evaluate(x_test, y_test, batch_size=64)
    print (score)
    
    if do_plot:
        plot_keras_model(model)
        draw_accuracy_and_loss(history)
        
    return score

# Svm
def run_svm(data, do_plot=False):
    
    train_len = int(len(data) * .7)
    data_train = data[0:train_len]
    data_test = data[train_len:]
    
    num_col = len(data_train[0]) - 1
    
    x_train = data_train[:, 0:num_col]
    y_train = data_train[:, num_col:].flatten()
    
    x_test = data_test[:, 0:num_col]
    y_test = data_test[:, num_col:].flatten()
    
    clf = SVC(C=1.0, kernel='rbf')
    clf.fit(x_train, y_train)
    y_predicted = clf.predict(x_test)
    score = clf.score(x_test, y_test) 
    print (score)
    
    if do_plot:
        draw_roc(y_predicted, y_test)
    
    return score

In [None]:
# Main
    
def main():
    # This runs the model 10 times with x number of rows
    #numRowsToConsider = 100
    numRowsToConsider = None
    numIter = 1
    sum = 0
    for i in range(0, numIter):
        #data = get_feature_with_labels(numRowsToConsider, nonSpamDir="NaturalImages", spamDir="SpamImages")
        #sum += run_svm(data, True)
        #sum += run_feed_forward(data, True)[1]
        sum += run_cnn(data, True)[1]
    sum /= numIter
    print (sum)

main()

Train on 781 samples, validate on 386 samples
Epoch 1/100
Epoch 2/100

In [None]:
# This section and rest are just tests
def extract_features(imagePath, vector_size=32):
    image = imageio.imread(imagePath)
    alg = cv2.KAZE_create()
    kps = alg.detect(image)
    kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
    print (len(kps))
    kps, dsc = alg.compute(image, kps)
    print (len(dsc))
    print (len(dsc[0]))
    dsc = dsc.flatten()
    print (len(dsc))
    needed_size = (vector_size * 32)
    if dsc.size < needed_size:
        dsc = numpy.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    return dsc

def main2():
    #features = create_feature_vector("SpamImages", "fire.jpg")
    #print len(features)
    #print features
    #image = cv2.imread("SpamImages/fire.jpg")
    #plotImage(image)
    #image2 = cv2.resize(image, (600, 600))
    #plotImage(image2)
    
    
    """data = get_feature_with_labels()
    data"""
    
    #print (len(data[0]))
    
    

    #create_feature_vector_for_canny("NaturalImages", origImageName)
    
    #plainImageFileName = "SpamImages/boots1.jpg"
    """plainImage =  cv2.imread(plainImageFileName)
    shape_hog(plainImage, "HOG-Ham.png")
    
    #plt.imshow(plainImage, cmap = 'gray', interpolation = 'bicubic')
    #plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
    #plt.show()
    cv_rgb = cv2.cvtColor(plainImage, cv2.COLOR_BGR2RGB)
    plt.imshow(cv_rgb)
    plt.show()
    
    plainImage2 =  cv2.imread("HOG-Ham.png")

    print "hi"
    
    cv_rgb2 = cv2.cvtColor(plainImage2, cv2.COLOR_BGR2RGB)
    plt.imshow(cv_rgb2)
    plt.show()

    shape_cannyEdges(plainImage)
    cv_rgb2 = cv2.cvtColor(cv2.imread("Cannyedges.jpg"), cv2.COLOR_BGR2RGB)
    plt.imshow(cv_rgb2)
    plt.show()"""
    
    #x = create_feature_vector_for_canny("SpamImages", "fire.jpg", 3.0)
    numpy.set_printoptions(threshold=100)
    #print x
    
    x = numpy.ndarray((10, 3))
    print(x)
    train_len = int(len(x) * .7)
    #print x[0:train_len]
    #print "sep"
    #print x[train_len:]
    
    num_col = len(x[0]) - 1
    
    print(x[:,0:num_col])
    
    print(x[:, num_col:].flatten())