# SuAVE LeNet CNN Model Generator
For a simple Python implementation see https://www.pyimagesearch.com/2016/08/01/lenet-convolutional-neural-network-in-python/

The model was originally described in http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf

## 1. Disable autoscroll and retrieve survey parameters from the URL

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

## 2. Import all packages (this might take a few seconds)

In [4]:
import warnings
warnings.filterwarnings('ignore')  # "error", "ignore", "always", "default", "module" or "once"
# import the necessary packages
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array
from keras.utils import to_categorical
from imutils import paths

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras import backend as K
# import local lenet.py file describing the LeNet implementation with RELU activation functions
from lenet import LeNet

# More imports
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import csv
import pandas as pd
import re
import cv2
import os

# import the necessary packages for SVM predictor
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import imutils

# Import widget functionality
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))


Using TensorFlow backend.


## 3. Initializing the number of times the model will loop and its batch size for learning

In [11]:
from collections import OrderedDict

epochCount = OrderedDict()
epochCount['25 Iterations'] = 25
epochCount['50 Iterations'] = 50
epochCount['75 Iterations'] = 75

def f(epoch_count):
    return epoch_count

epochNum = interact(f, epoch_count=epochCount)


interactive(children=(Dropdown(description='epoch_count', options=OrderedDict([('25 Iterations', 25), ('50 Ite…

In [14]:
batchS = OrderedDict()
batchS['32 Batch Size'] = 32
batchS['64 Batch Size'] = 64
batchS['128 Batch Size'] = 128

def f(batch_size):
    return batch_size

batchNum = interact(f, batch_size=batchS)

interactive(children=(Dropdown(description='batch_size', options=OrderedDict([('32 Batch Size', 32), ('64 Batc…

In [45]:
# init the number of epochs to train for, init learning rate and batch size
EPOCHS = epochNum.widget.result
INIT_LR = 1e-3
BS = batchNum.widget.result
# init the image suffix, yset, and image list
suffix = '.jpg'
img_list = []
yset = []
# create labels list and 2 dicts for 2 way mapping
labels = []
# key = label value = number
label_yval = dict()
# key = number value = label
yval_label = dict()

## Run and choose the column name that coresponds with the column label to be predicted

In [46]:
# use csv file to grab images/labels
csv_path = "../../temp_csvs/" + csv_file
df = pd.read_csv(csv_path)

#generate image path
#lower_case_csv = csv_file.lower()
#lower_case_csv = lower_case_csv.split(user + "_")


dzc_file_array = dzc_file.split("/")
img_path = "../../images/" + dzc_file_array[-2] + "_" + dzc_file_array[-1].split(".")[0]




# Choose column of label for prediction
toPredict = list(df.columns.values)

pred_menu = OrderedDict()
for i in range(0, len(toPredict)):
    pred_menu[toPredict[i]] = toPredict[i]

def f(predictions_menu):
    return predictions_menu
# choose which label for predictions
out2 = interact(f, predictions_menu=pred_menu)

interactive(children=(Dropdown(description='predictions_menu', options=OrderedDict([('#img', '#img'), ('#name'…

In [47]:
a = widgets.IntSlider(value=60,min=20,max=300,step=10,description='Size, pixels:')
display(a)

IntSlider(value=60, description='Size, pixels:', max=300, min=20, step=10)

## Grab the images and configure them for predicting

### This might take a little while depending on the size of the dataset

In [48]:
im_dimension = a.value
# grab chosen column names
nameCol = df['#img']
predCol = df[out2.widget.result]

def matchImage(curr_image_array, image_list):
    
    for i in range(0, len(image_list)):
        
        if (np.array_equal(curr_image_array, image_list[i])):
            
            return i
    
    
labels = []
# add all fabric columns to the y set
for i in range (0,len(predCol)):
    labels.append(predCol[i])

# grab all unique labels
uni_labels = set(labels)
uni_labels = list(uni_labels)

# assign each label a dict key number
for i in range(0,len(uni_labels)):
    yval_label[i] = uni_labels[i]
    label_yval[uni_labels[i]] = i


yset = []    
# create list of keys associated with their labels
for i in range (0, len(labels)):
    yset.append(label_yval[labels[i]])

img_list = []    
# gather images from path created from file names in csv file
for i in range (0,len(nameCol)):
    base_filename = nameCol[i]
    fileName = os.path.join(img_path, base_filename + suffix)
    im = cv2.imread(fileName)
    im = cv2.resize(im, (im_dimension,im_dimension))
    im = img_to_array(im)
    img_list.append(im)

# Shuffle the data
p = np.random.permutation(len(yset))

test_train_dict = {}
test_train_list = []

# Relable for splitting sets
Y = []
X = []
for i in range(0,len(yset)):
    Y.append(yset[p[i]])
    X.append(img_list[p[i]])
    
# split the test and training set 75:25
split = int(len(X)*(.75))

## Original DO NOT DELETE
#xtrain = X[:split]
#xtest = X[split:]


## Testing new 
xtrain = []
xtest = []
for i in range(0, len(X)):
    
    if (i < split):
        curr_image_index = matchImage(X[i], img_list)
        test_train_dict[nameCol[curr_image_index]] = "train"
        xtrain.append(X[i])
    else:
        curr_image_index = matchImage(X[i], img_list)
        test_train_dict[nameCol[curr_image_index]] = "test"
        xtest.append(X[i])

    
    
ytrain = Y[:split]
ytest = Y[split:]

for i in range(0, len(nameCol)):
    #print(i)
    #print(nameCol[i])
    test_train_list.append(test_train_dict[nameCol[i]])


# transform to arrays
trainX = np.array(xtrain, dtype="float")/255.0
testX = np.array(xtest, dtype ="float")/255.0

ytrain = np.array(ytrain)
ytest = np.array(ytest)

# parsed Y data containers
trainY = []
testY = []


# convert labels from int to vectors
trainY = np_utils.to_categorical(ytrain,len(uni_labels))
testY = np_utils.to_categorical(ytest,len(uni_labels))

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
                        height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
                        horizontal_flip=True, fill_mode="nearest")
# initialize the model
model = LeNet.build(width=im_dimension, height=im_dimension, depth=3, classes=len(uni_labels))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
                metrics=["accuracy"])

## Train the predictive model

### This is relative to the size of the data set and may take a few minutes 

In [49]:
# train the network
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


## Take the original data and predict based on the model

In [39]:
# Reshape original input data images for predicting
img_check = np.array(img_list, dtype ="float")/255.0

predictionsMade = []
preds = model.predict(img_check)
prediction_confidence = []
for i in range(0, len(preds)):
    prediction_confidence.append(np.amax(preds[i]))   



# Run all data through the prediction model that was created
for i in range (0,len(img_check)):
    predIndex = np.where(preds[i] == np.amax(preds[i]))
    prediction = int(predIndex[0][0])
    predictionsMade.append(prediction)

print(prediction_confidence)    
# Count how many correct predictions were made
correct = 0
for i in range (0,len(predictionsMade)):
    if(predictionsMade[i] == yset[i]):
        correct += 1 
        
print("Accuracy: " + str(correct/len(yset)))

[0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422, 0.3643422

In [21]:
#Generate model file and save
modelName = user + "_cnn_" + out2.widget.result + "_" + str(epochNum.widget.result) + "_" + str(batchNum.widget.result) + ".h5"
modelPath = "models/"

model.save(os.path.join(modelPath, modelName))

OSError: Driver write request failed (file write failed: time = Fri Feb 15 20:31:07 2019
, filename = 'models/ilyaj_cnn_Common Name_25_32.h5', file descriptor = 59, errno = 28, error message = 'No space left on device', buf = 0x7fffcf24aa60, total write size = 96, bytes this sub-write = 96, bytes actually written = 18446744073709551615, offset = 0)

In [35]:
#Load model
from keras.models import load_model
model2 = load_model(os.path.join(modelPath, modelName))

## Enter a new header for the prediction column

In [23]:
# Translate back to original csv label names
finalPred = []
for i in range (0,len(predictionsMade)):
    finalPred.append(yval_label[predictionsMade[i]])

from IPython.display import display
input_text = widgets.Text(
    value="predicted " + out2.widget.result,
    placeholder='Type label here',
    disabled=False
)
output_text = widgets.Text(
    value="predicted " + out2.widget.result,
    placeholder='New Header will be displayed here',
    disabled=False
)

def bind_input_to_output(sender):
    output_text.value = input_text.value

input_text.observe(bind_input_to_output)

print("Input new column Header Label: ")

display(input_text)
display(output_text)

Input new column Header Label: 


Text(value='predicted Gender', placeholder='Type label here')

Text(value='predicted Gender', placeholder='New Header will be displayed here')

## Write the predictions back to the original CSV

In [25]:
# Append the new column w/ it's new column name
pred_conf_col = "pred_conf " + input_text.value
test_train_col = "test_train " + input_text.value

df[input_text.value] = finalPred
df[pred_conf_col] = prediction_confidence
df[test_train_col] = test_train_list


print(input_text.value)

#Get file path
path = "../../temp_csvs"

# new file name
new_file =  csv_file[:-4]+'_v1.csv'
new_file_path = os.path.join(path, new_file)
df.to_csv(os.path.join(path, new_file), index=None)

pred_gender


In [26]:
pd.set_option('display.max_rows',1000)
df

Unnamed: 0,Plate,Filename,#img,#name,Common Name,Taxonomic Name,Gender,pred_gender,pred_conf pred_gender,test_train pred_gender
0,3372,IMG_3372_1.jpg,IMG_3372_1,IMG_3372_1,Queen,Danaus gilippus,Male,Male,0.603116,train
1,3372,IMG_3372_2.jpg,IMG_3372_2,IMG_3372_2,Queen,Danaus gilippus,Female,Male,0.604195,test
2,3372,IMG_3372_3.jpg,IMG_3372_3,IMG_3372_3,Queen,Danaus gilippus,Female,Male,0.604154,test
3,3374,IMG_3374_1.jpg,IMG_3374_1,IMG_3374_1,Queen,Danaus gilippus,Male,Male,0.609368,train
4,3374,IMG_3374_2.jpg,IMG_3374_2,IMG_3374_2,Queen,Danaus gilippus,Male,Male,0.60959,train
5,3374,IMG_3374_3.jpg,IMG_3374_3,IMG_3374_3,Queen,Danaus gilippus,Male,Male,0.595818,train
6,3374,IMG_3374_4.jpg,IMG_3374_4,IMG_3374_4,Queen,Danaus gilippus,Male,Male,0.595102,train
7,3374,IMG_3374_5.jpg,IMG_3374_5,IMG_3374_5,Queen,Danaus gilippus,Male,Male,0.602597,train
8,3374,IMG_3374_6.jpg,IMG_3374_6,IMG_3374_6,Queen,Danaus gilippus,Male,Male,0.588665,train
9,3375,IMG_3375_1.jpg,IMG_3375_1,IMG_3375_1,Queen,Danaus gilippus,Male,Male,0.583372,test


In [19]:
csv_file

'ilyaj_Queen_by_Gender_clone_.csv'

In [20]:
#Input survey name

default_name = csv_file.split(".")[0] + "_" + str(EPOCHS) + "_" + str(BS) + "_" + str(im_dimension)
print(default_name)

from IPython.display import display
input_text = widgets.Text(value=default_name)
output_text = widgets.Text(value=default_name)


def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.observe(bind_input_to_output)

print("Input survey name here:")
# Display input text box widget for input
display(input_text)

display(output_text)

survey_name = output_text.value

ilyaj_Queen_by_Gender_clone__25_32_100
Input survey name here:


Text(value='ilyaj_Queen_by_Gender_clone__25_32_100')

Text(value='ilyaj_Queen_by_Gender_clone__25_32_100')

In [21]:
#Parse url
upload_url = survey_url.split("/main")[0]

if "https" in upload_url:
    upload_url = upload_url.replace("s","",1)
    upload_url = upload_url + ":3001"    

upload_url = upload_url + "/uploadCSV"
    
new_survey_url_base = survey_url.split(user)[0]

In [22]:
upload_url

'http://suave-dev.sdsc.edu/uploadCSV'

In [23]:
import requests
upload_data = {'name': input_text.value, 'dzc': dzc_file, 'user':user}
files = {"file": open(new_file_path, "rb")}
r = requests.post(upload_url, files=files, data=upload_data)
print(r.status_code, r.reason)

regex = re.compile('[^0-9a-zA-Z_]')
survey_url = survey_name
survey_url =  regex.sub('_', survey_url)

url = new_survey_url_base + user + "_" + input_text.value + ".csv" + "&views=" + views + "&view=" + view
print(url)
print ("Click the URL to open the new survey")


200 OK
http://suave-dev.sdsc.edu/main/file=ilyaj_ilyaj_Queen_by_Gender_25_32_100.csv&views=1110001&view=grid
Click the URL to open the new survey
