<h1><span style="color:red">SuAVE LeNet CNN Model Generator</span></h1>


For a simple Python implementation see https://www.pyimagesearch.com/2016/08/01/lenet-convolutional-neural-network-in-python/

The model was originally described in http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf

## 1. Disable autoscroll and retrieve survey parameters from the URL

In [14]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [15]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

## 2. Import all packages (this might take a few seconds)

In [16]:
# Import widget functionality
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display
import warnings
warnings.filterwarnings('ignore')  # "error", "ignore", "always", "default", "module" or "once"
# import the necessary packages
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from keras.utils import to_categorical
from imutils import paths

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras import backend as K
# import local lenet.py file describing the LeNet implementation with RELU activation functions
from lenet import LeNet

# More imports
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import csv
import pandas as pd
import re
import cv2
import os

# import the necessary packages for SVM predictor
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import imutils


def printmd(string):
    display(Markdown(string))


## 3. Initializing the number of times the model will loop and its batch size for learning

In [17]:
from collections import OrderedDict

epochCount = OrderedDict()
epochCount['25 Iterations'] = 25
epochCount['50 Iterations'] = 50
epochCount['75 Iterations'] = 75

def f(epoch_count):
    return epoch_count

epochNum = interact(f, epoch_count=epochCount)


In [18]:
batchS = OrderedDict()
batchS['32 Batch Size'] = 32
batchS['64 Batch Size'] = 64
batchS['128 Batch Size'] = 128

def f(batch_size):
    return batch_size

batchNum = interact(f, batch_size=batchS)

In [19]:
# init the number of epochs to train for, init learning rate and batch size
EPOCHS = epochNum.widget.result
INIT_LR = 1e-3
BS = batchNum.widget.result
# init the image suffix, yset, and image list
suffix = '.png'
img_list = []
yset = []
# create labels list and 2 dicts for 2 way mapping
labels = []
# key = label value = number
label_yval = dict()
# key = number value = label
yval_label = dict()

## 4. Load the data and choose the column name to be predicted

In [20]:
# use csv file to grab images/labels
absolutePath = "/home/jovyan/temp_csvs/"
# read the csv file
file = open(absolutePath + csv_file, encoding="latin-1")
df = pd.read_csv(file)

#generate image path
localdzc = dzc_file.replace("https://maxim.ucsd.edu/dzgen/lib-staging-uploads","/lib-nfs/dzgen")
full_images_location = localdzc.replace("/content.dzc","/full_images/")


# Choose column of label for prediction
toPredict = list(df.columns.values)

pred_menu = OrderedDict()
for i in range(0, len(toPredict)):
    pred_menu[toPredict[i]] = toPredict[i]

def f(predictions_menu):
    return predictions_menu
# choose which label for predictions
out2 = interact(f, predictions_menu=pred_menu)

## 5. Select pixel resolution

In [21]:
a = widgets.IntSlider(value=60,min=20,max=300,step=10,description='Size, pixels:')
display(a)

In [22]:
printmd("<h2><span style='color:red'>Verify model parameters:</span></h2>")
printmd("<b>Variable to predict:  " +out2.widget.result + "</b>")
printmd("<b>Pixel resolution:  " +str(a.value) + "</b>")
printmd("<b>Path to images:  " +full_images_location + "</b>")
printmd("<b>Number of epochs:  " + str(EPOCHS) + "</b>")
printmd("<b>Batch size:  " +str(BS) + "</b>")


## 6. Grab the images and configure them for predicting

### This might take a little while depending on the size of the dataset

In [23]:
import requests
im_dimension = a.value
# grab chosen column names
nameCol = df['#img']
predCol = df[out2.widget.result]

def matchImage(curr_image_array, image_list):
    for i in range(0, len(image_list)):
        if (np.array_equal(curr_image_array, image_list[i])):
            return i

labels = []
# add all fabric columns to the y set
for i in range(0, len(predCol)):
    labels.append(predCol[i])

# grab all unique labels
uni_labels = set(labels)
uni_labels = list(uni_labels)

# assign each label a dict key number
for i in range(0, len(uni_labels)):
    yval_label[i] = uni_labels[i]
    label_yval[uni_labels[i]] = i

yset = []
# create list of keys associated with their labels
for i in range(0, len(labels)):
    yset.append(label_yval[labels[i]])

img_list = []

counter = 0
im_count = widgets.Label(value="0% images loaded")
display(im_count)
numfiles = len(nameCol)

for i in range(0, len(nameCol)):
    base_filename = nameCol[i]

    url = os.path.join(full_images_location, base_filename + suffix)
    response = requests.get(url)
    
    image_data = np.frombuffer(response.content, dtype=np.uint8)
    im = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
    im = cv2.resize(im, (im_dimension, im_dimension))
    im = img_to_array(im)
    img_list.append(im)

    counter += 1
    im_count.value = str(int(counter / numfiles * 100)) + "% images loaded"

# Shuffle the data
p = np.random.permutation(len(yset))

test_train_dict = {}
test_train_list = []

# Relable for splitting sets
Y = []
X = []
for i in range(0,len(yset)):
    Y.append(yset[p[i]])
    X.append(img_list[p[i]])
    
# split the test and training set 75:25
split = int(len(X)*(.75))

## Original DO NOT DELETE
#xtrain = X[:split]
#xtest = X[split:]


## Testing new 
xtrain = []
xtest = []
for i in range(0, len(X)):
    
    if (i < split):
        curr_image_index = matchImage(X[i], img_list)
        test_train_dict[nameCol[curr_image_index]] = "train"
        xtrain.append(X[i])
    else:
        curr_image_index = matchImage(X[i], img_list)
        test_train_dict[nameCol[curr_image_index]] = "test"
        xtest.append(X[i])

    
    
ytrain = Y[:split]
ytest = Y[split:]

for i in range(0, len(nameCol)):
    #print(i)
    #print(nameCol[i])
    test_train_list.append(test_train_dict[nameCol[i]])


# transform to arrays
trainX = np.array(xtrain, dtype="float")/255.0
testX = np.array(xtest, dtype ="float")/255.0

ytrain = np.array(ytrain)
ytest = np.array(ytest)

# parsed Y data containers
trainY = []
testY = []


# convert labels from int to vectors
trainY = np_utils.to_categorical(ytrain,len(uni_labels))
testY = np_utils.to_categorical(ytest,len(uni_labels))

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
                        height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
                        horizontal_flip=True, fill_mode="nearest")
# initialize the model
model = LeNet.build(width=im_dimension, height=im_dimension, depth=3, classes=len(uni_labels))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
                metrics=["accuracy"])

printmd("<b><span style='color:red'>Images split into training and testing sets at 75:25</span></b>")


## 7. Train the predictive model

### This is relative to the size of the data set and the compute resources you use. May take from minutes to hours

In [None]:
# train the network
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, verbose=1)
printmd("<b><span style='color:red'>Model generation complete</span></b>")

## 8. Take the original data and predict based on the new model

In [16]:
# Reshape original input data images for predicting
img_check = np.array(img_list, dtype ="float")/255.0

predictionsMade = []
preds = model.predict(img_check)
prediction_confidence = []
for i in range(0, len(preds)):
    prediction_confidence.append(np.amax(preds[i]))   



# Run all data through the prediction model that was created
for i in range (0,len(img_check)):
    predIndex = np.where(preds[i] == np.amax(preds[i]))
    prediction = int(predIndex[0][0])
    predictionsMade.append(prediction)

print(prediction_confidence)    
# Count how many correct predictions were made
correct = 0
for i in range (0,len(predictionsMade)):
    if(predictionsMade[i] == yset[i]):
        correct += 1 
        
printmd("<b><span style='color:red'>Accuracy: " + str(correct/len(yset)) + "</span></b>")


## 9. Save the model file under models/, and reload it

In [17]:
#Generate model file and save
modelName = user + "_cnn_" + out2.widget.result + "_" + str(epochNum.widget.result) + "_" + str(batchNum.widget.result) + ".h5"
modelPath = "models/"
if not os.path.exists(modelPath):
    os.makedirs(modelPath)
model.save(os.path.join(modelPath, modelName))
#Load model
from keras.models import load_model
model2 = load_model(os.path.join(modelPath, modelName))
printmd("<b><span style='color:red'>Model saved</span></b>")

## 10. Enter a new header for the prediction column

In [18]:
# Translate back to original csv label names
finalPred = []
for i in range (0,len(predictionsMade)):
    finalPred.append(yval_label[predictionsMade[i]])

from IPython.display import display
input_text = widgets.Text(
    value="predicted " + out2.widget.result,
    placeholder='Type label here',
    disabled=False
)
output_text = widgets.Text(
    value="predicted " + out2.widget.result,
    placeholder='New Header will be displayed here',
    disabled=False
)

def bind_input_to_output(sender):
    output_text.value = input_text.value

input_text.observe(bind_input_to_output)

print("Input new column Header Label: ")

display(input_text)
display(output_text)

## 11. Save the new version of CSV file, and give a name to new survey

In [None]:
# Append the new column w/ it's new column name
pred_conf_col = "pred_conf " + input_text.value
test_train_col = "test_train " + input_text.value

df[input_text.value] = finalPred
df[pred_conf_col] = prediction_confidence
df[test_train_col] = test_train_list

print(input_text.value)

new_file = absolutePath + csv_file[:-4]+'_v1.csv'
printmd("<b><span style='color:red'>A new temporary file will be created at: </span></b>")
print(new_file)
df.to_csv(new_file, index=None)



In [None]:
#Input survey name

default_name = csv_file.split(".")[0] + "_" + str(EPOCHS) + "_" + str(BS) + "_" + str(im_dimension)

from IPython.display import display
input_text = widgets.Text(value=default_name)
output_text = widgets.Text(value=default_name)

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)

survey_name = output_text.value

In [None]:
#Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

## 12. Generate the survey and create survey URL

In [None]:
referer = survey_url.split("/main")[0] +"/"
upload_url = referer + "uploadCSV"
new_survey_url_base = survey_url.split(user)[0]

import requests
import re
csv = {"file": open(new_file, "rb")}
upload_data = {
    'name': input_text.value,
    'dzc': dzc_file,
    'user':user
}
headers = {
    'User-Agent': 'suave user agent',
    'referer': referer
}

r = requests.post(upload_url, files=csv, data=upload_data, headers=headers)

if r.status_code == 200:
    printmd("<b><span style='color:red'>New survey created successfully</span></b>")
    regex = re.compile('[^0-9a-zA-Z_]')
    s_url = survey_name
    s_url =  regex.sub('_', s_url)

    url = new_survey_url_base + user + "_" + s_url + ".csv" + "&views=" + views + "&view=" + view
    print(url)
    printmd("<b><span style='color:red'>Click the URL to open the new survey</span></b>")
else:
    printmd("<b><span style='color:red'>Error creating new survey. Check if a survey with this name already exists.</span></b>")
    printmd("<b><span style='color:red'>Reason: </span></b>"+ str(r.status_code) + " " + r.reason)

