In [1]:
import matplotlib.pyplot as plt
import skimage.io as io
from skimage.color import rgb2gray
import os
import pandas as pd
from pandas.tools.plotting import table
import numpy as np
from imp import reload
from sklearn.neighbors import KNeighborsClassifier
import sys
sys.path.append('../')
import helpers.image_processing as im
import helpers.histogram_classifier as hc
import sklearn.metrics as m

In [11]:
from IPython.core.display import HTML
css = open('styles/table-style.css').read() #+ open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

Get the image classes and names:

In [3]:
classes = pd.read_csv('../data/food-101/meta/top_classes.csv', index_col=0)
class_list = list(classes['class'].unique())

image_names = {}
for c in class_list:
    image_names[c] = np.array(classes[classes['class'] == c]['name'])

In [4]:
def run_knn(class_list, image_names, hist_type, weight_type):
    """
    Initializes and runs a K-nearest neighbors model, and then tests the classification accuracy
    :params class_list: list of relevant image classes
    :params image_names: dictionary of image names for each class
    :params hist_type: name of method for calculating histogram; choices are: (greyscale_histogram, 
        color_histogram, complete_histogram)
    :params weight_type: type of weighting to use in KNN model; Uniform or Weighted
    :return: actual: array of actual labels
    :return: predicted: array of predicted labels
    """    
    # Split into test and training data
    train, test = hc.split_data(image_names, 0.75)
        
    # Create actual labels and data
    labels = []
    data = []

    for c in class_list:
        for i in train[c]:
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            if hist_type == 'Greyscale':
                hist, bins = hc.greyscale_histogram(img)
            elif hist_type == 'RGB':
                hist = hc.color_histogram(img)
            elif hist_type == 'Complete':
                hist = hc.complete_histogram(img)
            labels.append(c)
            data.append(hist)
            
    # Initialize model
    if weight_type == 'Uniform':
        neigh = KNeighborsClassifier(n_neighbors=10)
    elif weight_type == 'Weighted':
        neigh = KNeighborsClassifier(n_neighbors=10, weights='distance')
    
    # Fit the model to the data
    neigh.fit(np.array(data), np.array(labels))
    
    # Test the fitted model on the test data
    
    actual = []
    test_data = []

    for c in class_list:
        for i in test[c]:
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            if hist_type == 'Greyscale':
                hist, bins = hc.greyscale_histogram(img)
            elif hist_type == 'RGB':
                hist = hc.color_histogram(img)
            elif hist_type == 'Complete':
                hist = hc.complete_histogram(img)
            actual.append(c)
            test_data.append(hist)

    predicted = neigh.predict(np.array(test_data))
    actual = np.array(actual)
    return actual, predicted

***Version 1: Greyscale Only***

In [62]:
actual, predicted = run_knn(class_list, image_names, 'Greyscale', 'Uniform')

In [63]:
accuracy, metrics, confusion = hc.get_metrics(actual, predicted, class_list)

In [64]:
print("Classifier accuracy is: {:.1f}%".format(accuracy*100))

Classifier accuracy is: 15.9%


###### Overall Results:

In [65]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.14,0.1,0.11,250.0
lasagna,0.12,0.08,0.1,250.0
french_toast,0.14,0.18,0.16,250.0
guacamole,0.09,0.11,0.1,250.0
apple_pie,0.15,0.16,0.16,250.0
cheesecake,0.19,0.12,0.14,250.0
hamburger,0.1,0.12,0.11,250.0
fried_rice,0.2,0.27,0.23,250.0
carrot_cake,0.15,0.13,0.14,250.0
chocolate_cake,0.26,0.2,0.22,250.0


###### Confusion Matrix

In [66]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,24,23,26,24,16,10,25,19,20,17,21,25
lasagna,16,21,33,23,17,12,22,27,18,16,11,34
french_toast,12,19,46,29,12,8,31,25,14,7,11,36
guacamole,6,8,27,27,25,15,30,34,6,11,23,38
apple_pie,10,16,25,18,41,14,12,41,23,13,10,27
cheesecake,12,10,23,22,31,29,24,23,29,19,21,7
hamburger,12,10,33,31,27,3,29,35,9,8,13,40
fried_rice,5,16,29,30,23,9,24,67,8,4,5,30
carrot_cake,13,20,27,20,26,13,24,22,33,13,16,23
chocolate_cake,27,8,16,26,14,26,17,8,28,49,21,10


***Version 1b: Greyscale, weighted***

In [5]:
actual, predicted = run_knn(class_list, image_names, 'Greyscale', 'Weighted')

In [6]:
accuracy, metrics, confusion = hc.get_metrics(actual, predicted, class_list)

In [7]:
print("Classifier accuracy is: {:.1f}%".format(accuracy*100))

Classifier accuracy is: 15.0%


###### Overall Results:

In [8]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.12,0.1,0.11,250.0
lasagna,0.1,0.09,0.09,250.0
french_toast,0.14,0.13,0.13,250.0
guacamole,0.14,0.18,0.15,250.0
apple_pie,0.1,0.07,0.08,250.0
cheesecake,0.25,0.12,0.16,250.0
hamburger,0.12,0.15,0.13,250.0
fried_rice,0.17,0.24,0.2,250.0
carrot_cake,0.15,0.09,0.11,250.0
chocolate_cake,0.26,0.15,0.19,250.0


###### Confusion Matrix:

In [9]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,24,19,27,26,13,6,25,13,13,13,40,31
lasagna,17,23,22,23,11,6,24,45,11,7,12,49
french_toast,17,31,33,23,12,8,26,26,16,5,14,39
guacamole,17,7,14,44,16,4,32,38,5,8,17,48
apple_pie,13,23,11,17,17,13,22,48,15,5,22,44
cheesecake,20,19,18,25,16,29,19,21,25,18,23,17
hamburger,7,18,24,38,22,1,37,24,5,5,20,49
fried_rice,9,15,19,28,15,5,27,60,9,5,13,45
carrot_cake,17,26,15,30,13,14,27,23,23,11,21,30
chocolate_cake,24,21,18,20,9,18,21,15,16,37,38,13


***Version 2: RGB***

In [75]:
actual, predicted = run_knn(class_list, image_names, 'RGB', 'Uniform')

In [77]:
accuracy, metrics, confusion = hc.get_metrics(actual, predicted, class_list)

In [78]:
print("Classifier accuracy is: {:.2f}%".format(accuracy*100))

Classifier accuracy is: 19.10%


###### Overall Results:

In [79]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.15,0.11,0.13,250.0
lasagna,0.16,0.09,0.11,250.0
french_toast,0.13,0.23,0.17,250.0
guacamole,0.29,0.32,0.3,250.0
apple_pie,0.12,0.11,0.12,250.0
cheesecake,0.23,0.12,0.16,250.0
hamburger,0.17,0.22,0.19,250.0
fried_rice,0.24,0.37,0.29,250.0
carrot_cake,0.11,0.1,0.11,250.0
chocolate_cake,0.25,0.17,0.2,250.0


###### Confusion Matrix:

In [80]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,28,7,41,16,19,8,27,15,23,19,22,25
lasagna,6,22,52,11,20,10,30,25,22,4,12,36
french_toast,12,15,58,13,20,4,30,28,18,7,6,39
guacamole,3,5,24,80,14,4,21,48,12,11,12,16
apple_pie,13,12,41,17,28,13,33,44,21,6,9,13
cheesecake,12,12,33,17,23,31,18,22,30,25,14,13
hamburger,11,13,33,24,19,6,55,28,6,12,10,33
fried_rice,10,10,28,17,24,6,26,92,8,1,6,22
carrot_cake,27,13,43,13,17,13,24,29,25,16,21,9
chocolate_cake,17,8,23,28,10,27,12,12,27,43,28,15


***Version 2b: RGB, weighted***

In [81]:
actual, predicted = run_knn(class_list, image_names, 'RGB', 'Weighted')

In [82]:
accuracy, metrics, confusion = hc.get_metrics(actual, predicted, class_list)

In [83]:
print("Classifier accuracy is: {:.2f}%".format(accuracy*100))

Classifier accuracy is: 18.77%


###### Overall Results:

In [84]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.15,0.16,0.16,250.0
lasagna,0.14,0.09,0.11,250.0
french_toast,0.15,0.22,0.18,250.0
guacamole,0.29,0.29,0.29,250.0
apple_pie,0.14,0.08,0.11,250.0
cheesecake,0.15,0.06,0.08,250.0
hamburger,0.14,0.22,0.18,250.0
fried_rice,0.23,0.36,0.28,250.0
carrot_cake,0.14,0.07,0.09,250.0
chocolate_cake,0.32,0.17,0.22,250.0


###### Confusion Matrix:

In [85]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,41,14,31,17,18,6,27,17,10,6,44,19
lasagna,19,23,28,9,16,9,40,37,8,5,19,37
french_toast,20,24,54,10,7,6,46,23,11,5,14,30
guacamole,13,12,14,72,5,4,34,33,4,11,18,30
apple_pie,22,16,36,14,21,6,34,40,13,8,11,29
cheesecake,18,17,26,13,24,14,24,25,18,21,21,29
hamburger,21,10,31,15,8,4,56,35,2,5,18,45
fried_rice,9,15,21,17,4,8,38,90,5,4,11,28
carrot_cake,29,13,42,17,21,13,19,26,18,9,19,24
chocolate_cake,24,9,23,21,10,12,16,14,23,43,43,12


***Version 3: RGB + Greyscale***

In [86]:
actual, predicted = run_knn(class_list, image_names, 'Complete', 'Uniform')

In [87]:
accuracy, metrics, confusion = hc.get_metrics(actual, predicted, class_list)

In [88]:
print("Classifier accuracy is: {:.2f}%".format(accuracy*100))

Classifier accuracy is: 18.03%


###### Overall Results:

In [89]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.16,0.13,0.14,250.0
lasagna,0.15,0.08,0.11,250.0
french_toast,0.15,0.26,0.19,250.0
guacamole,0.26,0.29,0.27,250.0
apple_pie,0.16,0.16,0.16,250.0
cheesecake,0.18,0.1,0.13,250.0
hamburger,0.13,0.21,0.16,250.0
fried_rice,0.16,0.22,0.19,250.0
carrot_cake,0.14,0.11,0.12,250.0
chocolate_cake,0.27,0.2,0.23,250.0


###### Confusion Matrix:

In [90]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,32,10,46,15,17,9,30,13,18,14,29,17
lasagna,15,21,42,16,24,8,26,33,17,4,14,30
french_toast,13,20,65,14,17,4,35,23,18,7,10,24
guacamole,10,7,23,72,11,6,38,39,9,9,11,15
apple_pie,14,10,30,19,39,15,27,40,14,7,8,27
cheesecake,12,13,36,18,31,24,23,21,25,25,13,9
hamburger,14,13,36,24,17,5,53,34,5,7,8,34
fried_rice,9,14,30,26,19,9,38,55,9,6,8,27
carrot_cake,19,8,38,11,25,15,31,24,28,21,14,16
chocolate_cake,26,7,20,25,16,18,26,7,25,49,22,9


***Version 3b: RGB, weighted***

In [91]:
actual, predicted = run_knn(class_list, image_names, 'Complete', 'Weighted')

In [92]:
accuracy, metrics, confusion = hc.get_metrics(actual, predicted, class_list)

In [93]:
print("Classifier accuracy is: {:.2f}%".format(accuracy*100))

Classifier accuracy is: 18.63%


###### Overall Results:

In [94]:
metrics

Unnamed: 0,Precision,Recall,F1-Score,Support
pork_chop,0.13,0.16,0.14,250.0
lasagna,0.14,0.09,0.11,250.0
french_toast,0.16,0.25,0.2,250.0
guacamole,0.21,0.2,0.2,250.0
apple_pie,0.16,0.08,0.11,250.0
cheesecake,0.27,0.1,0.15,250.0
hamburger,0.17,0.25,0.2,250.0
fried_rice,0.23,0.35,0.28,250.0
carrot_cake,0.12,0.06,0.08,250.0
chocolate_cake,0.31,0.18,0.23,250.0


###### Confusion Matrix:

In [95]:
confusion

Unnamed: 0,pork_chop,lasagna,french_toast,guacamole,apple_pie,cheesecake,hamburger,fried_rice,carrot_cake,chocolate_cake,steak,pizza
pork_chop,39,14,38,11,8,6,32,13,9,9,40,31
lasagna,20,23,39,12,11,5,28,40,8,4,22,38
french_toast,19,19,63,11,10,3,42,24,8,4,14,33
guacamole,24,11,20,49,9,1,33,33,8,10,21,31
apple_pie,19,17,39,16,21,9,24,41,11,6,16,31
cheesecake,23,11,34,26,15,25,21,29,17,17,17,15
hamburger,22,9,27,20,13,0,63,29,9,5,16,37
fried_rice,10,19,11,20,17,5,31,87,5,8,9,28
carrot_cake,25,15,33,15,9,9,24,31,16,18,33,22
chocolate_cake,29,9,27,22,4,18,15,10,17,46,45,8
