# SuAVE Image to Label SVM Prediction Model

In [130]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 

<IPython.core.display.Javascript object>

### Import all packages

In [131]:
import warnings
warnings.filterwarnings('ignore')  # "error", "ignore", "always", "default", "module" or "once"

# More imports
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import csv
import pandas as pd
import re
import cv2
import os

# import the necessary packages for SVM predictor
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import imutils
from sklearn.externals import joblib

# import histogram func
from histogram import Histograms

# Import widget functionality
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

## Run and choose the column name that coresponds with the column label to be predicted

In [123]:
# use csv file to grab images/labels
csv_path = "../../temp_csvs/" + csv_file
df = pd.read_csv(csv_path)

#generate image path
lower_case_csv = csv_file.lower()
img_path = "../../images/" + lower_case_csv.split(".")[0]

# Choose column of label for prediction
toPredict = list(df.columns.values)

pred_menu = {}
for i in range(0, len(toPredict)):
    pred_menu[toPredict[i]] = toPredict[i]

def f(predictions_menu):
    return predictions_menu

out2 = interact(f, predictions_menu=pred_menu)

interactive(children=(Dropdown(description='predictions_menu', options={'Latitude#number': 'Latitude#number', …

## Choose type of histogram to extract from images

In [124]:
hist_type = { 
    'HSV Color Histogram': 0,
    'Blue Histogram': 1,
    'Red Histogram': 2,
    'Green Histogram': 3
    }

def f(histogram_type):
    return histogram_type
typeH= interact(f, histogram_type=hist_type);

interactive(children=(Dropdown(description='histogram_type', options={'Blue Histogram': 1, 'Red Histogram': 2,…

## Get histogram from each image in the dataset

In [125]:
# init the image suffix
suffix = '.jpg'
labels = []

labelHeader = out2.widget.result

# grab chosen column names
nameCol = df['#img']
predCol = df[labelHeader]

# add all fabric columns to the y set
for i in range (0,len(predCol)):
    labels.append(predCol[i])
# create new hist_list
hist_list = []
# gather images from path created from file names in csv file
typeH = typeH.widget.result
for i in range (0,len(nameCol)):
    base_filename = nameCol[i]
    fileName = os.path.join(img_path, base_filename + suffix)
    im = cv2.imread(fileName)
    if typeH == 0:
        hist = Histograms.extract_color_histogram(im)
    elif typeH == 1: 
        hist = Histograms.extract_blue_histogram(im)
    elif typeH == 2: 
        hist = Histograms.extract_red_histogram(im)
    elif typeH == 3: 
        hist = Histograms.extract_green_histogram(im)
    hist_list.append(hist)
    
# transform labels into numerical system
le = LabelEncoder()
labels = le.fit_transform(predCol)



# separate data into test/train sets for features/labels
(xtrain, xtest, ytrain, ytest) = train_test_split(np.array(hist_list),labels, test_size = 0.5)

print("xtrain: ", xtrain)
print("ytrain: ", ytrain)
# Train the linear regression classifier
model2 = LinearSVC()
model2.fit(xtrain, ytrain)

# Calculate predictions on the data set
predictions = model2.predict(np.array(hist_list))
print(classification_report(labels, predictions, target_names = le.classes_))

xtrain:  [[9.9626221e-03 2.8270839e-03 1.2718667e-03 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [1.1735303e-06 2.9338260e-06 4.7723570e-05 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [3.0762043e-03 6.4249979e-03 2.2894552e-03 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 ...
 [2.3388861e-01 6.7396165e-04 2.8377332e-04 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [3.8053196e-02 4.2564529e-03 1.4690207e-03 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [2.0987228e-03 8.2647125e-04 3.2517349e-04 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]]
ytrain:  [ 5  5  4  6  5 13 17 13 12 22  4  6 17  1  5  3  5  4  5 11  5  5 18 22
 14 13  6  1  5 18  5 12  5  5  4 12 12 20 21 15  2 15  5  2  2  5 12  5
 21 12  3 14 14  5  3 11  5 10 14 14  5  4 12  4  7 12  5  1 14 22  5 21
  5  4  7 12  8  5  4  5 21 14 18 14 12 14  0  5 17  5  6  5 22  4  2 12
 19  2 17  7 16 13  1 12  4  6 12 13 12  9  5 13 13  1 12 19  6 12  6  6
  4  5 12  5  6  4  4  4 12  9 12  7  5 14 12 17  4 1

In [126]:
# save the label
filepath = "/labels/"
fileName = labelHeader + '_' + csv_file[:-4] + '_svmModel' + '.txt'
file = open(os.path.join(filePath, fileName),"w") 
file.write(str(typeH))
file.write(out2.widget.result)
file.close()

#Generate model file and save

modelPath = "/models/"
modelName =  user + "_svm_" + out2.widget.result + "_" + str(typeH) + ".pkl"
joblib.dump(model2, os.path.join(modelPath, modelName)) 

['svmModel.pkl']

In [127]:
typeH

0

In [128]:
labelHeader

'Petrographic Fabric'

In [129]:
labelHeader + '_' + csv_file[:-4] + 'svmModel' + '.txt'

'Petrographic Fabric_zaslavsk_Kommos_Ceramic_PetrographysvmModel.txt'