In [174]:
%matplotlib inline
import skimage.io as io
import sklearn.cluster as cluster
from sklearn.decomposition import PCA
import glob, os
import numpy as np
import pandas as pd
import skimage.transform as transform
from skimage.color import rgb2gray
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.grid_search import GridSearchCV
from imp import reload
import sys
sys.path.append('../')
import helpers.histogram_classifier as hc

In [2]:
from IPython.core.display import HTML
css = open('styles/table-style.css').read() #+ open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [3]:
classes = pd.read_csv('../data/food-101/meta/top_classes.csv', index_col=0)
class_list = list(classes['class'].unique())

image_names = {}
for c in class_list:
    image_names[c] = np.array(classes[classes['class'] == c]['name'])

In [4]:
def get_kmeans_features(class_list, image_names, size, color_type, num_features):
    """
    Get features using PCA for a dictionary of image names
    :params class_list: list of n image classes
    :params image_names: dictionary of m image names for each class
    :params size: standard size to use for images during PCA as tuple
    :params color_type: indicator of RGB or Greyscale
    :params num_features: number of features to extract
    :return: features: array of features of shape (n x m, num_features)
    :return: true_labels: array of true labels of length n x m
    """
    images = []
    true_labels = []
    
    for c in class_list:
        for i in image_names[c]:
            true_labels.append(c)
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            img = transform.resize(img, size)
            if color_type == 'Greyscale':
                img = rgb2gray(img)
            images.append(img.reshape(-1, 1))
    alg = cluster.KMeans(n_clusters=num_features)
    y = np.array(images)
    features = alg.fit_transform(y.reshape(len(y), -1))
    
    return features, np.array(true_labels)

In [72]:
def get_pca_features(class_list, image_names, size, color_type, num_features):
    """
    Get features using PCA for a dictionary of image names
    :params class_list: list of n image classes
    :params image_names: dictionary of m image names for each class
    :params size: standard size to use for images during PCA as tuple
    :params color_type: indicator of RGB or Greyscale
    :params num_features: number of features to extract
    :return: features: array of features of shape (n x m, num_features)
    :return: true_labels: array of true labels of length n x m
    """
    images = []
    true_labels = []
    
    for c in class_list:
        for i in image_names[c]:
            true_labels.append(c)
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            img = transform.resize(img, size)
            if color_type == 'Greyscale':
                img = rgb2gray(img)
            images.append(img.reshape(-1, 1))
    alg = PCA(n_components=num_features)
    y = np.array(images)
    features = alg.fit_transform(y.reshape(len(y), -1))
    
    return features, np.array(true_labels)

In [148]:
def get_pixel_features(class_list, image_names, size):
    """
    Get features using PCA for a dictionary of image names
    :params class_list: list of n image classes
    :params image_names: dictionary of m image names for each class
    :params size: standard size to use for images as tuple
    :return: features: array of features of shape (n x m, num_features)
    :return: true_labels: array of true labels of length n x m
    """
    features = []
    true_labels = []
    
    
    for c in class_list:
        for i in image_names[c]:
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            img = transform.resize(img, size)
            features.append(img.reshape(-1))
            true_labels.append(c)
    
    return np.array(features), np.array(true_labels)

### K-Means Features

In [220]:
train, test = hc.split_data(image_names, 0.75)

In [221]:
features, labels = get_kmeans_features(class_list, train, (32, 32, 3), 'RGB', 100)

In [222]:
test_features, test_labels = get_kmeans_features(class_list, test, (30, 30, 3), 'RGB', 100)

##### k-Nearest Neighbors

In [223]:
knn = KNeighborsClassifier()

In [224]:
%%time
knn.fit(features, labels)

CPU times: user 35.9 ms, sys: 1.83 ms, total: 37.8 ms
Wall time: 36.3 ms


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [225]:
%%time
predictions = knn.predict(test_features)

CPU times: user 7.72 s, sys: 55.4 ms, total: 7.77 s
Wall time: 7.86 s


In [229]:
overall, metrics, confusion = hc.get_metrics(test_labels, predictions, class_list)

In [230]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.091,0.084,0.091,0.091


In [231]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.02,0.0,0.01,250.0
lasagna,0.08,0.11,0.09,250.0
french_toast,0.09,0.2,0.12,250.0
guacamole,0.08,0.03,0.04,250.0
apple_pie,0.12,0.09,0.1,250.0
cheesecake,0.09,0.03,0.05,250.0
hamburger,0.07,0.1,0.08,250.0
fried_rice,0.13,0.06,0.08,250.0
carrot_cake,0.11,0.18,0.14,250.0
chocolate_cake,0.17,0.14,0.15,250.0


In [232]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,1,26,49,11,11,8,36,10,33,23,5,37
lasagna,7,27,46,10,18,8,34,13,28,9,5,45
french_toast,1,32,51,8,16,8,41,7,29,18,8,31
guacamole,9,43,47,8,14,5,32,11,37,7,14,23
apple_pie,4,33,45,6,22,6,25,16,37,18,7,31
cheesecake,6,20,58,8,14,8,30,13,35,17,14,27
hamburger,4,36,48,10,11,9,26,13,34,11,7,41
fried_rice,8,35,33,7,31,2,36,16,31,7,6,38
carrot_cake,5,23,47,7,15,10,24,9,45,22,8,35
chocolate_cake,1,21,52,8,11,9,37,4,25,35,19,28


#### Random Forest Classifier

In [233]:
rf = RandomForestClassifier(max_depth=5, n_estimators=15)

In [234]:
%%time
rf.fit(features, labels)

CPU times: user 523 ms, sys: 4.88 ms, total: 528 ms
Wall time: 527 ms


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=15, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [235]:
%%time
predictions = rf.predict(test_features)

CPU times: user 10.8 ms, sys: 32 ms, total: 42.8 ms
Wall time: 41.9 ms


In [236]:
overall, metrics, confusion = hc.get_metrics(test_labels, predictions, class_list)

In [237]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.108,0.093,0.106,0.108


In [238]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.11,0.08,0.09,250.0
lasagna,0.07,0.14,0.1,250.0
french_toast,0.17,0.01,0.02,250.0
guacamole,0.07,0.03,0.04,250.0
apple_pie,0.12,0.14,0.13,250.0
cheesecake,0.13,0.11,0.12,250.0
hamburger,0.1,0.04,0.06,250.0
fried_rice,0.11,0.16,0.13,250.0
carrot_cake,0.04,0.01,0.01,250.0
chocolate_cake,0.16,0.31,0.21,250.0


In [239]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,20,33,2,16,11,8,10,27,4,58,15,46
lasagna,23,36,1,6,25,11,3,39,6,25,9,66
french_toast,10,34,2,9,24,13,9,30,3,44,6,66
guacamole,9,57,0,8,27,31,18,25,4,15,14,42
apple_pie,19,39,1,9,35,25,3,32,4,34,8,41
cheesecake,16,32,0,9,20,27,7,16,6,63,14,40
hamburger,15,45,0,8,24,12,10,31,4,27,15,59
fried_rice,24,28,3,11,36,15,5,40,4,19,6,59
carrot_cake,11,39,1,9,28,18,12,26,2,55,11,38
chocolate_cake,19,39,1,10,18,12,5,17,7,78,18,26


### PCA Features

In [192]:
train, test = hc.split_data(image_names, 0.75)

In [193]:
features, labels = get_pca_features(class_list, train, (30, 30, 3), 'RGB', 100)

In [194]:
test_features, test_labels = get_pca_features(class_list, test, (30, 30, 3), 'RGB', 100)

##### k-Nearest Neighbors

In [196]:
knn = KNeighborsClassifier(n_neighbors=9)

In [197]:
%%time
knn.fit(features, labels)

CPU times: user 43.9 ms, sys: 1.85 ms, total: 45.8 ms
Wall time: 44.7 ms


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=9, p=2,
           weights='uniform')

In [199]:
%%time
predictions = knn.predict(test_features)

CPU times: user 5.78 s, sys: 18.6 ms, total: 5.8 s
Wall time: 5.82 s


In [200]:
overall, metrics, confusion = hc.get_metrics(test_labels, predictions, class_list)

In [201]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.063,0.051,0.059,0.063


In [202]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.05,0.02,0.03,250.0
lasagna,0.08,0.06,0.07,250.0
french_toast,0.05,0.02,0.02,250.0
guacamole,0.04,0.03,0.03,250.0
apple_pie,0.08,0.22,0.12,250.0
cheesecake,0.04,0.04,0.04,250.0
hamburger,0.11,0.04,0.06,250.0
fried_rice,0.06,0.2,0.1,250.0
carrot_cake,0.09,0.07,0.08,250.0
chocolate_cake,0.01,0.0,0.01,250.0


In [203]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,5,13,4,23,60,27,4,60,19,5,1,29
lasagna,6,16,7,14,63,29,12,52,19,6,3,23
french_toast,6,16,4,18,56,26,10,60,18,6,1,29
guacamole,11,16,5,7,51,14,13,90,14,4,6,19
apple_pie,13,14,6,10,56,22,7,58,12,18,7,27
cheesecake,10,20,9,20,42,10,7,76,13,8,8,27
hamburger,6,16,7,14,54,23,11,67,15,2,3,32
fried_rice,9,22,7,7,59,27,8,50,10,13,7,31
carrot_cake,18,18,8,17,52,16,10,51,17,8,7,28
chocolate_cake,7,19,8,26,64,14,7,65,13,1,1,25


##### Random Forest

In [204]:
rf = RandomForestClassifier()

In [211]:
%%time
rf.fit(features, labels)

CPU times: user 983 ms, sys: 25 ms, total: 1.01 s
Wall time: 1.01 s


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [215]:
%%time
predictions = rf.predict(test_features)

CPU times: user 13.9 ms, sys: 1.34 ms, total: 15.3 ms
Wall time: 13.7 ms


In [216]:
overall, metrics, confusion = hc.get_metrics(test_labels, predictions, class_list)

In [217]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.071,0.067,0.069,0.071


In [218]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.06,0.03,0.04,250.0
lasagna,0.07,0.06,0.06,250.0
french_toast,0.09,0.09,0.09,250.0
guacamole,0.07,0.06,0.06,250.0
apple_pie,0.09,0.18,0.12,250.0
cheesecake,0.06,0.08,0.07,250.0
hamburger,0.09,0.06,0.08,250.0
fried_rice,0.08,0.08,0.08,250.0
carrot_cake,0.08,0.12,0.09,250.0
chocolate_cake,0.04,0.04,0.04,250.0


In [219]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,7,23,17,15,39,21,20,26,35,24,7,16
lasagna,13,15,20,19,36,28,10,25,34,22,18,10
french_toast,13,22,22,16,47,18,20,21,27,22,6,16
guacamole,12,14,31,15,30,30,6,25,27,36,12,12
apple_pie,14,14,8,21,44,29,16,15,35,27,13,14
cheesecake,15,26,26,22,38,20,15,23,27,20,9,9
hamburger,7,16,17,20,23,33,16,22,47,16,16,17
fried_rice,9,19,21,12,39,33,17,20,29,21,14,16
carrot_cake,8,17,28,18,40,22,22,13,29,26,14,13
chocolate_cake,9,13,22,16,49,32,13,27,31,10,13,15


### Pixels as Features

In [153]:
train, test = hc.split_data(image_names, 0.75)

In [155]:
features, labels = get_pixel_features(class_list, train, (32, 32, 3))

In [163]:
test_features, test_labels = get_pixel_features(class_list, test, (32, 32, 3))

##### k-Nearest Neighbors

In [177]:
knn = KNeighborsClassifier(n_neighbors=9)

In [179]:
%%time
knn.fit(features, labels)

CPU times: user 2.19 s, sys: 27.7 ms, total: 2.22 s
Wall time: 2.23 s


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=9, p=2,
           weights='uniform')

In [180]:
%%time
predictions = knn.predict(test_features)

CPU times: user 2min 25s, sys: 850 ms, total: 2min 26s
Wall time: 2min 27s


In [181]:
overall, metrics, confusion = hc.get_metrics(test_labels, predictions, class_list)

In [182]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.143,0.133,0.203,0.143


In [183]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.32,0.13,0.19,250.0
lasagna,0.15,0.13,0.14,250.0
french_toast,0.14,0.04,0.06,250.0
guacamole,0.25,0.11,0.15,250.0
apple_pie,0.1,0.39,0.16,250.0
cheesecake,0.13,0.28,0.18,250.0
hamburger,0.13,0.03,0.05,250.0
fried_rice,0.12,0.27,0.16,250.0
carrot_cake,0.13,0.09,0.11,250.0
chocolate_cake,0.35,0.09,0.14,250.0


In [184]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,33,28,9,6,68,28,3,40,15,7,6,7
lasagna,5,32,8,6,85,33,7,51,8,5,2,8
french_toast,2,18,10,3,78,40,4,69,16,1,2,7
guacamole,2,13,5,27,85,45,5,53,3,2,4,6
apple_pie,2,10,4,11,97,49,5,55,9,1,0,7
cheesecake,9,10,2,7,66,70,5,44,27,5,2,3
hamburger,6,17,7,7,80,52,8,53,13,1,2,4
fried_rice,3,15,5,9,83,54,1,67,8,1,1,3
carrot_cake,8,13,3,10,87,57,6,32,23,4,1,6
chocolate_cake,18,15,5,5,73,41,4,27,27,22,9,4


##### Random Forest 

In [185]:
rf = RandomForestClassifier(n_estimators=14, max_depth=7)

In [186]:
%%time
rf.fit(features, labels)

CPU times: user 3.09 s, sys: 56.4 ms, total: 3.14 s
Wall time: 3.15 s


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=7, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=14, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [187]:
%%time
predictions = rf.predict(test_features)

CPU times: user 33.1 ms, sys: 28.3 ms, total: 61.4 ms
Wall time: 60.1 ms


In [188]:
overall, metrics, confusion = hc.get_metrics(test_labels, predictions, class_list)

In [189]:
overall

Unnamed: 0,Accuracy,F1,Precision,Recall
Results,0.207,0.192,0.191,0.207


In [190]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.18,0.2,0.19,250.0
lasagna,0.2,0.2,0.2,250.0
french_toast,0.13,0.07,0.09,250.0
guacamole,0.27,0.3,0.29,250.0
apple_pie,0.11,0.06,0.08,250.0
cheesecake,0.21,0.22,0.21,250.0
hamburger,0.11,0.04,0.06,250.0
fried_rice,0.23,0.33,0.27,250.0
carrot_cake,0.19,0.13,0.16,250.0
chocolate_cake,0.24,0.43,0.31,250.0


In [191]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,50,21,13,6,4,12,9,10,8,56,45,16
lasagna,16,51,17,20,15,13,8,46,12,10,15,27
french_toast,28,33,18,14,15,21,9,29,16,25,13,29
guacamole,18,10,7,76,12,11,9,37,10,21,21,18
apple_pie,9,30,9,20,15,28,12,47,15,20,11,34
cheesecake,19,17,8,12,15,54,7,24,19,42,13,20
hamburger,23,23,15,27,8,18,10,27,11,31,27,30
fried_rice,16,17,9,36,16,19,9,83,6,4,9,26
carrot_cake,19,14,10,16,19,31,9,16,33,43,22,18
chocolate_cake,25,4,5,11,6,21,4,4,20,108,36,6
