# Import graphlab that is used for image processing and model generation

To run, install graphlab 

In [2]:
import sys
sys.path.append('C:\Users\stefhamilton\Anaconda2\envs\gl-env\Lib\site-packages')
import graphlab as gl

In [70]:
imagesUrl = 'unprocessedImages/'
images = gl.image_analysis.load_images(imagesUrl, "auto", with_path=True, recursive=True)

Below was used in powershell to create unique image names
Get-ChildItem *.jpg | Rename-Item -NewName { $_.Name -replace '\.jpg','_notRaised.jpg' }
 

In [71]:
actualClassifications = gl.SFrame.read_csv('classifications\initialClassifications.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [72]:
handRaises = sum(actualClassifications['hasHandRaise']=='Y')
nonhandRaises = sum(actualClassifications['hasHandRaise']=='N')
if handRaises == 0 :
    print "A problem was encountered when loading images and labels"
else:
    print "Successfully loaded %s images, hand raises: %s, non hand raises: %s " \
    % (len(images),handRaises, nonhandRaises)

Successfully loaded 154 images, hand raises: 74, non hand raises: 83 


# Combine image and labels

In [73]:
import re

In [74]:
def addCsvLabelsToImages(images, labels):
    hlabel = []
    for i in range(0,len(images)):
        #print 'i',i,images[i]['path']
        m = re.findall('\/([^\/]*.jpg)',images[i]['path'])
        if len(m) > 1:
            print "Error More than 1 name match found for image ", images[i]['path']
        elif len(m) == 0:
            print "No name matches found for image ", images[i]['path']
        else:
#            print 'labels[image]',labels[labels['image']]
            labelMatch = labels[labels['image']==m[0]]
            if len(labelMatch)>1:
                print "Error: More than 1 images were found for  ",images[i]['path'], labelMatch
            elif len(m) == 0:
                print "Error: No images were matches found for image name ", images[i]['path']
            else:
                hlabel.append(labelMatch['hasHandRaise'][0] == 'Y')
    return hlabel

In [75]:
images['actualClass'] = addCsvLabelsToImages(images, actualClassifications)

# Resize images to match what the model was trained on

In [76]:
images['image'] = gl.image_analysis.resize(images['image'], 256, 256, 3)

# Load the model that was trained on Imagenet images

Originally taken from http://s3.amazonaws.com/dato-datasets/deeplearning/imagenet_model_iter45 https://turi.com/products/create/docs/generated/graphlab.neuralnet_classifier.NeuralNetClassifier.extract_features.html?highlight=neuralnet_classifier Reference: Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. “Imagenet classification with deep convolutional neural networks.” Advances in neural information processing systems. 2012.

The file is not included in github since it is larger than can be easily added

In [84]:
pretrained_model = gl.load_model('C:/Users/stefhamilton/Desktop/video-coding/pretrained_imagenet_iter45_model')

In [105]:
all_data = gl.toolkits.cross_validation.shuffle(images, random_seed=1) # shuffle data so able to get more probabilities for the dashboard
all_data['features'] = pretrained_model.extract_features(gl.SFrame(images))
all_data.save('images_features_labels_preprocessed')

# Load data from here if source files are not changing


In [86]:
#all_data = gl.load_sframe('images_features_labels_preprocessed')

# Build Model

Skip tuning and just use default parameters

In [89]:
train_data, test_data = all_data.random_split(.8, seed=10)

In [101]:
model = gl.logistic_classifier.create(train_data, features = ['features'], target='actualClass',\
        max_iterations=100, l1_penalty=.00001, l2_penalty=20,   \
        verbose=True, class_weights={1L:1,0L:1}, convergence_threshold=.00001, validation_set=test_data) 

In [108]:
test_data['prob'] = model.predict(test_data, output_type='probability')

In [110]:
test_data.export_csv('test_data_predictions.csv')

In [106]:
all_data

path,image,actualClass,features
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,1,"[0.0, 0.0, 0.0, 0.0, 0.692548274994, 0.0, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,1,"[0.0, 0.0, 0.0, 2.02316689491, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,0,"[0.0, 0.0, 2.5354218483, 0.348499953747, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.287137448788, 0.0, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,0,"[0.0, 0.0, 0.0, 0.187218368053, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,1,"[0.0, 0.0, 0.0, 3.59831476212, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,1,"[0.0, 0.0, 0.0, 3.37315511703, 0.0, 0.0, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,0,"[0.0, 0.0254239439964, 0.0, 0.399780213833, ..."
C:/Users/stefhamilton/v /dataviscourse-pr- ...,Height: 256 Width: 256,1,"[0.0, 0.0, 0.0, 2.47744560242, ..."
