In [1]:
import scipy.io as sio
import scipy
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import math
import pickle
from sklearn.cluster import KMeans, AgglomerativeClustering
from skimage.feature import hog

from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score

In [2]:
# Load training data
training_data_map = pickle.load(open( "training_data.p", "rb" ))
# Create hog features 
# Train linear SVM
# Test SVM 

In [3]:
def get_hog_features(img_file):
    img = cv2.imread(img_file)
    orient = 11
    pix_per_cell = 16
    cell_per_block = 2
    feature_vec = []
    for channel in range(0,3):
        features = hog(img[:,:,channel], orientations=orient, 
                                  pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), 
                                  transform_sqrt=False, 
                                  visualise=False, feature_vector=True)
        #print('Feature length: ', len(features))
        feature_vec = feature_vec + list(features)
        
    return feature_vec

def get_features(img_ids):
    cropped_imgs = 'mp2/cropped_imgs/'
    feature_arr = []
    for img_id in img_ids:
        img_file = cropped_imgs+str(img_id) + '.jpeg'
        feature_vec = get_hog_features(img_file)
        feature_arr.append(feature_vec)
    return feature_arr

def get_train_test_dataset(pos_imgs, neg_imgs):
    pos_features = get_features(pos_imgs)
    neg_features = get_features(neg_imgs)
    X = np.vstack((pos_features, neg_features)).astype(np.float64)  
    y = np.hstack((np.ones(len(pos_features)), np.zeros(len(neg_features))))
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)
    
    return X_train, X_test, y_train, y_test


def test_all_classes(train_data_map):
    keys = train_data_map.keys()
    class_score_map = {}
    saved_model_map = {}
    for key in keys:
        pos_imgs, neg_imgs = train_data_map[key]
        X_train, X_test, y_train, y_test = get_train_test_dataset(pos_imgs, neg_imgs)
        svc = LinearSVC(C=0.1, random_state=42)
        svc.fit(X_train, y_train)
        score = svc.score(X_test, y_test)
        class_score_map[key] = score
        saved_model_map[key] = svc
        
    pickle.dump(saved_model_map, open('saved_model_map.p', 'wb'))
    return class_score_map

In [4]:
print(training_data_map.keys())

dict_keys([4544, 1794, 2038, 6854, 586, 11211, 8525, 4431, 10192, 11665, 2901, 8790, 3735, 3929, 2843, 8717, 3807, 3684, 12133, 8143, 359, 7247, 6704, 6833, 9523, 10422, 1594, 9753])


In [5]:
cropped_imgs = 'mp2/cropped_imgs/'
test_img = training_data_map[4544][0][0]
feature_vec = get_hog_features(cropped_imgs+str(test_img)+'.jpeg')
print(len(feature_vec))

3300


/Users/saurabh/anaconda3/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)


In [6]:
pos_imgs = training_data_map[4544][0]
neg_imgs = training_data_map[4544][1]

X_train, X_test, y_train, y_test = get_train_test_dataset(pos_imgs, neg_imgs)


/Users/saurabh/anaconda3/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)


In [7]:
svc = LinearSVC(C=0.1, random_state=42)
svc.fit(X_train, y_train)
score = svc.score(X_test, y_test)
y_score = svc.decision_function(X_test)
average_precision = average_precision_score(y_test, y_score)

print('SVM test score: ', score)    

print('Average precision-recall score: {0:0.2f}'.format(
      average_precision))

SVM test score:  0.851063829787
Average precision-recall score: 0.95


In [8]:
print(y_test)
#print(y_score)
print(svc.predict(X_test))
print(X_test)

[ 1.  0.  0.  1.  0.  1.  0.  1.  0.  1.  0.  1.  0.  1.  0.  0.  1.  1.
  0.  1.  0.  1.  1.  0.  0.  1.  1.  0.  1.  1.  0.  1.  1.  0.  0.  0.
  0.  1.  0.  0.  0.  0.  0.  1.  0.  0.  1.]
[ 1.  0.  0.  0.  1.  1.  0.  1.  0.  1.  0.  1.  0.  1.  0.  1.  1.  1.
  0.  0.  0.  1.  1.  0.  1.  1.  1.  0.  1.  1.  0.  1.  1.  0.  0.  0.
  1.  1.  1.  0.  0.  0.  0.  1.  0.  0.  1.]
[[ 0.02251182  0.01847929  0.01530311 ...,  0.00101926  0.00741391
   0.09530951]
 [ 0.02793621  0.02328045  0.02295923 ...,  0.00537242  0.00399705
   0.00248489]
 [ 0.06133245  0.03421076  0.03331228 ...,  0.00776699  0.00261622
   0.00817355]
 ..., 
 [ 0.02205252  0.00693754  0.00534815 ...,  0.0145044   0.00891971
   0.02356592]
 [ 0.00902015  0.01334715  0.0692479  ...,  0.0231708   0.12241151
   0.12846059]
 [ 0.05344011  0.03755282  0.0458027  ...,  0.04091257  0.01071437
   0.00615879]]


In [9]:
class_score_map = test_all_classes(training_data_map)
avg_score = 0
for key, score in class_score_map.items():
    print(key, score)
    avg_score += score

print('Avg score: ', avg_score/len(class_score_map))

/Users/saurabh/anaconda3/lib/python3.5/site-packages/skimage/feature/_hog.py:119: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15
  'be changed to `L2-Hys` in v0.15', skimage_deprecation)


4544 0.851063829787
1794 0.963636363636
10422 0.944444444444
6854 0.909090909091
586 0.878048780488
11211 0.866666666667
8525 0.836363636364
4431 0.90243902439
10192 0.830188679245
11665 0.930107526882
2901 0.941747572816
8790 0.886363636364
3735 0.96062992126
3929 0.961538461538
2843 1.0
8717 0.876543209877
3807 0.862745098039
3684 0.927165354331
12133 0.93
8143 0.928057553957
359 0.895348837209
7247 0.905405405405
6704 0.864661654135
6833 0.971014492754
9523 0.891304347826
2038 0.897959183673
1594 0.924528301887
9753 0.875
Avg score:  0.907573674717
