In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import skimage.io as io
from skimage import feature
from skimage.transform import resize
from sklearn.feature_selection import VarianceThreshold
from skimage.color import rgb2gray
from sklearn.decomposition import PCA
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from skimage.feature import corner_fast, corner_peaks, corner_harris
import sys
import os
sys.path.append('../')
import helpers.histogram_classifier as hc

In [2]:
def sub_grid(image, size=32):
    """
    Split an image into a grid of non-overlapping square boxes, and return each box in turn
    :params image: image as numpy array
    :params size: side-length of box to use; should be a power of 2
    :return: yields each box in turn
    """
    pic_size = image.shape
    box_side = size
    for x in range(0, pic_size[0], box_side):
        for y in range(0, pic_size[1], box_side):
            yield image[x:x + box_side, y:y + box_side, :]

In [3]:
from IPython.core.display import HTML
css = open('styles/table-style.css').read() #+ open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [4]:
def get_image_features(img, box_size):
    """
    Get RGB and edge features for a single image by splitting the image into a grid of non-overlapping
    square boxes, and obtaining features for each box
    :params img: image as a numpy array
    :params box_size: the side-length in pixels of the box to be used
    :return: mean Red pixel value in each box
    :return: mean Green pixel value in each box
    :return: mean Blue pixel value in each box
    :return: number of edges in each box, extracted using skimage canny edges algorithm
    :return: number of corners in each box, extracted using skimage corner_fast algorithm
    """
    image_features = []
    for box in sub_grid(img, size=box_size):
        features = []
        features.append(np.mean(box[:, :, 0]))
        features.append(np.mean(box[:, :, 1]))
        features.append(np.mean(box[:, :, 2]))
        features.append(np.sum(feature.canny(rgb2gray(box), sigma=1)))
        features.append(corner_peaks(corner_fast(rgb2gray(box))).shape[0])
        image_features.append(np.array(features))
    image_features = np.array(image_features)
    return image_features[:, 0], image_features[:, 1], image_features[:, 2], image_features[:, 3], image_features[:, 4]

In [5]:
def get_complete_features(image_classes, image_names, box_size):
    """
    Get RGB and edge features for a dictionary of image names
    :params image_classes: list of n image classes
    :params image_names: dictionary of m image names for each class
    :params size: size as tuple for reshaping images for extracting edges and corners
    :return: red_features: average red pixel value for each box in each image
    :return: green_features: average green pixel value for each box in each image
    :return: blue_features: average blue pixel value for each box in each image
    :return: edge_features: number of edges detected for each box in each image
    :return: corner_features: number of corners detected for each box in each image
    :return: labels: array of true labels for each image
    """
    red_features = []
    green_features = []
    blue_features = []
    edge_features = []
    corner_features = []
    labels = []
    for c in image_classes:
        for i in image_names[c]:
            img = io.imread(os.path.join("../data/food-101/top_classes/", c, "", i))
            red, green, blue, edge, corners = get_image_features(img, box_size=box_size)
            red_features.append(red)
            green_features.append(green)
            blue_features.append(blue)
            edge_features.append(edge)
            corner_features.append(corners)
            labels.append(c)
    
    return np.array(red_features), np.array(green_features), np.array(blue_features), np.array(edge_features), np.array(corner_features), np.array(labels)

In [6]:
BOX = 16

In [7]:
classes = pd.read_csv('../data/food-101/meta/top_classes.csv', index_col=0)
class_list = list(classes['class'].unique())

image_names = {}
for c in class_list:
    image_names[c] = np.array(classes[classes['class'] == c]['name'])

In [8]:
train, test = hc.split_data(image_names, 0.75)

In [9]:
#for c in class_list:
#    train[c] = train[c][:15]
#    test[c] = test[c][:5]

In [10]:
#Get complete histogram features, edge and corners
all_red_features, all_green_features, all_blue_features, all_edge_features, all_corner_features, train_labels = get_complete_features(class_list, train, BOX)

#### Classifier for red features

In [11]:
#Setup grid-search for top parameters
depth = np.array([8, 10, 12, 14])
estimators = np.array([10, 20, 50, 100, 200])
model = RandomForestClassifier()
grid = GridSearchCV(estimator=model, param_grid=dict(n_estimators=estimators, max_depth=depth))
grid.fit(all_red_features, train_labels)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'max_depth': array([ 8, 10, 12, 14]), 'n_estimators': array([ 10,  20,  50, 100, 200])},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)

In [12]:
print("Best score = {}".format(grid.best_score_))
print("Params: {}".format(grid.best_params_))

Best score = 0.206
Params: {'max_depth': 12, 'n_estimators': 200}


In [13]:
%%time
red_rf = grid.best_estimator_
red_rf.fit(all_red_features, train_labels)

CPU times: user 43.2 s, sys: 97.1 ms, total: 43.3 s
Wall time: 43.4 s


#### Classifier for green features

In [14]:
#Setup grid-search for top parameters
depth = np.array([8, 10, 12, 14])
estimators = np.array([10, 20, 50, 100, 200])
model = RandomForestClassifier()
grid = GridSearchCV(estimator=model, param_grid=dict(n_estimators=estimators, max_depth=depth))
grid.fit(all_green_features, train_labels)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'max_depth': array([ 8, 10, 12, 14]), 'n_estimators': array([ 10,  20,  50, 100, 200])},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)

In [15]:
print("Best score = {}".format(grid.best_score_))
print("Params: {}".format(grid.best_params_))

Best score = 0.22355555555555556
Params: {'max_depth': 10, 'n_estimators': 200}


In [16]:
%%time
green_rf = grid.best_estimator_
green_rf.fit(all_green_features, train_labels)

CPU times: user 37.8 s, sys: 69.6 ms, total: 37.9 s
Wall time: 37.9 s


#### Classifier for blue features

In [17]:
#Setup grid-search for top parameters
depth = np.array([8, 10, 12, 14])
estimators = np.array([10, 20, 50, 100, 200])
model = RandomForestClassifier()
grid = GridSearchCV(estimator=model, param_grid=dict(n_estimators=estimators, max_depth=depth))
grid.fit(all_blue_features, train_labels)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'max_depth': array([ 8, 10, 12, 14]), 'n_estimators': array([ 10,  20,  50, 100, 200])},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)

In [18]:
print("Best score = {}".format(grid.best_score_))
print("Params: {}".format(grid.best_params_))

Best score = 0.19344444444444445
Params: {'max_depth': 10, 'n_estimators': 200}


In [19]:
%%time
blue_rf = grid.best_estimator_
blue_rf.fit(all_blue_features, train_labels)

CPU times: user 37.6 s, sys: 80.5 ms, total: 37.7 s
Wall time: 37.7 s


#### Classifier for edge features

In [20]:
#Setup grid-search for top parameters
depth = np.array([8, 10, 12, 14])
estimators = np.array([10, 20, 50, 100, 200])
model = RandomForestClassifier()
grid = GridSearchCV(estimator=model, param_grid=dict(n_estimators=estimators, max_depth=depth))
grid.fit(all_edge_features, train_labels)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'max_depth': array([ 8, 10, 12, 14]), 'n_estimators': array([ 10,  20,  50, 100, 200])},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)

In [21]:
print("Best score = {}".format(grid.best_score_))
print("Params: {}".format(grid.best_params_))

Best score = 0.20555555555555555
Params: {'max_depth': 10, 'n_estimators': 200}


In [22]:
%%time
edge_rf = grid.best_estimator_
edge_rf.fit(all_edge_features, train_labels)

CPU times: user 16.3 s, sys: 46.1 ms, total: 16.3 s
Wall time: 16.3 s


#### Classifier for corner features

In [23]:
#Setup grid-search for top parameters
depth = np.array([8, 10, 12, 14])
estimators = np.array([10, 20, 50, 100, 200])
model = RandomForestClassifier()
grid = GridSearchCV(estimator=model, param_grid=dict(n_estimators=estimators, max_depth=depth))
grid.fit(all_corner_features, train_labels)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'max_depth': array([ 8, 10, 12, 14]), 'n_estimators': array([ 10,  20,  50, 100, 200])},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)

In [24]:
print("Best score = {}".format(grid.best_score_))
print("Params: {}".format(grid.best_params_))

Best score = 0.14622222222222223
Params: {'max_depth': 14, 'n_estimators': 200}


In [25]:
%%time
corner_rf = grid.best_estimator_
corner_rf.fit(all_corner_features, train_labels)

CPU times: user 7.56 s, sys: 48.2 ms, total: 7.61 s
Wall time: 7.63 s


In [26]:
red_features = red_rf.predict_proba(all_red_features)
green_features = green_rf.predict_proba(all_green_features)
blue_features = blue_rf.predict_proba(all_blue_features)
edge_features = edge_rf.predict_proba(all_edge_features)
corner_features = corner_rf.predict_proba(all_corner_features)

combined_features = np.concatenate([red_features, green_features, blue_features, edge_features, corner_features], axis=1)

##### Get test features

In [27]:
all_red_features_t, all_green_features_t, all_blue_features_t, all_edge_features_t, all_corner_features_t, test_labels = get_complete_features(class_list, test, BOX)

In [28]:
red_predictions = red_rf.predict_proba(all_red_features_t)
green_predictions = green_rf.predict_proba(all_green_features_t)
blue_predictions = blue_rf.predict_proba(all_blue_features_t)
edge_predictions = edge_rf.predict_proba(all_edge_features_t)
corner_predictions = corner_rf.predict_proba(all_corner_features_t)

combined_predictions = np.concatenate([red_predictions, green_predictions, blue_predictions, edge_predictions, corner_predictions], axis=1)

#### Bayesian Net for combining predictions

In [29]:
nb = GaussianNB()
nb.fit(combined_features, train_labels)

GaussianNB()

In [30]:
%%time
predicted_labels = nb.predict(combined_predictions)

CPU times: user 10.8 ms, sys: 1.83 ms, total: 12.7 ms
Wall time: 11.5 ms


In [31]:
overall, metrics, confusion = hc.get_metrics(test_labels, predicted_labels, class_list)

In [32]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.288,0.285,0.291,0.288


In [33]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.28,0.24,0.26,250.0
lasagna,0.3,0.22,0.26,250.0
french_toast,0.3,0.27,0.28,250.0
guacamole,0.24,0.22,0.23,250.0
apple_pie,0.22,0.3,0.25,250.0
cheesecake,0.34,0.34,0.34,250.0
hamburger,0.26,0.2,0.23,250.0
fried_rice,0.31,0.5,0.39,250.0
carrot_cake,0.26,0.29,0.28,250.0
chocolate_cake,0.35,0.34,0.34,250.0


In [34]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,60,15,22,11,13,6,11,14,26,18,46,8
lasagna,15,56,25,17,28,11,12,32,15,7,15,17
french_toast,25,18,68,19,22,7,16,22,20,7,16,10
guacamole,7,11,11,54,20,14,31,49,13,4,15,21
apple_pie,8,13,14,14,74,29,19,28,22,13,7,9
cheesecake,2,5,11,16,46,85,11,8,24,33,8,1
hamburger,12,12,11,14,38,19,51,25,24,16,22,6
fried_rice,13,9,5,21,24,6,13,124,8,1,5,21
carrot_cake,13,13,7,20,35,34,8,10,72,20,14,4
chocolate_cake,15,6,16,10,13,21,7,2,28,85,46,1


##### SVM for combining predictions

In [35]:
#Setup grid-search for top parameters
param_grid = [
  {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'kernel': ['linear']},
  {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']},
]
model = SVC()
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(combined_features, train_labels)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid=[{'kernel': ['linear'], 'C': [0.001, 0.01, 0.1, 1, 10, 100]}, {'kernel': ['rbf'], 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [1, 0.01, 0.001, 0.0001]}],
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=0)

In [36]:
print(grid.best_score_)
print(grid.best_params_)

0.999444444444
{'kernel': 'linear', 'C': 10}


In [37]:
%%time
sv = grid.best_estimator_
sv.fit(combined_features, train_labels)

CPU times: user 178 ms, sys: 856 µs, total: 179 ms
Wall time: 178 ms


In [38]:
%%time
predicted_labels = sv.predict(combined_predictions)

CPU times: user 66.6 ms, sys: 741 µs, total: 67.4 ms
Wall time: 66.3 ms


In [39]:
overall, metrics, confusion = hc.get_metrics(test_labels, predicted_labels, class_list)

In [40]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.282,0.258,0.295,0.282


In [41]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.31,0.22,0.25,250.0
lasagna,0.23,0.22,0.23,250.0
french_toast,0.42,0.16,0.23,250.0
guacamole,0.26,0.4,0.31,250.0
apple_pie,0.29,0.06,0.1,250.0
cheesecake,0.28,0.44,0.34,250.0
hamburger,0.24,0.09,0.13,250.0
fried_rice,0.26,0.45,0.33,250.0
carrot_cake,0.37,0.17,0.23,250.0
chocolate_cake,0.39,0.43,0.41,250.0


In [42]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,54,27,2,16,3,18,2,33,9,28,19,39
lasagna,15,56,14,29,9,23,6,26,7,4,4,57
french_toast,18,20,39,22,2,18,4,29,11,9,13,65
guacamole,11,8,3,100,3,17,7,41,2,6,13,39
apple_pie,6,34,7,26,16,56,9,36,9,9,3,39
cheesecake,4,22,3,31,4,110,11,8,6,30,6,15
hamburger,10,14,5,18,7,34,22,55,9,9,11,56
fried_rice,4,4,2,48,4,12,3,113,3,4,4,49
carrot_cake,11,27,4,30,3,44,11,26,42,22,8,22
chocolate_cake,5,13,4,16,0,30,7,10,8,108,24,25
