In [7]:
import skimage.io as io
from skimage.color import rgb2gray
import os
import pandas as pd
import numpy as np
from imp import reload
from sklearn.neighbors import KNeighborsClassifier
import sys
sys.path.append('../')
import helpers.image_processing as im
import helpers.histogram_classifier as hc

Get the image classes and names:

In [9]:
classes = pd.read_csv('../data/food-101/meta/top_classes.csv', index_col=0)
class_list = list(classes['class'].unique())

image_names = {}
for c in class_list:
    image_names[c] = np.array(classes[classes['class'] == c]['name'])

In [20]:
def run_knn(class_list, image_names, hist_type, weight_type):
    """
    Initializes and runs a K-nearest neighbors model, and then tests the classification accuracy
    :params class_list: list of relevant image classes
    :params image_names: dictionary of image names for each class
    :params hist_type: name of method for calculating histogram; choices are: (greyscale_histogram, 
        color_histogram, complete_histogram)
    :params weight_type: type of weighting to use in KNN model; Uniform or Weighted
    :return: prints out classification accuracy
    """    
    # Split into test and training data
    train = {}
    test = {}
    for k, v in image_names.items():
        np.random.shuffle(v)
        train[k] = v[:750]
        test[k] = v[750:]
        
    # Create actual labels and data
    labels = []
    data = []

    for c in class_list:
        for i in train[c]:
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            if hist_type == 'Greyscale':
                hist, bins = hc.greyscale_histogram(img)
            elif hist_type == 'RGB':
                hist = hc.color_histogram(img)
            elif hist_type == 'Complete':
                hist = hc.complete_histogram(img)
            labels.append(c)
            data.append(hist)
            
    # Initialize model
    if weight_type == 'Uniform':
        neigh = KNeighborsClassifier(n_neighbors=10)
    elif weight_type == 'Weighted':
        neigh = KNeighborsClassifier(n_neighbors=10, weights='distance')
    
    # Fit the model to the data
    neigh.fit(np.array(data), np.array(labels))
    
    # Test the fitted model on the test data
    
    actual = []
    test_data = []

    for c in class_list:
        for i in test[c]:
            img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
            if hist_type == 'Greyscale':
                hist, bins = hc.greyscale_histogram(img)
            elif hist_type == 'RGB':
                hist = hc.color_histogram(img)
            elif hist_type == 'Complete':
                hist = hc.complete_histogram(img)
            actual.append(c)
            test_data.append(hist)

    predicted = neigh.predict(np.array(test_data))
    actual = np.array(actual)
    classification_rate = sum(predicted == actual) / len(predicted)

    print("Using {} histogram and {} voting, classification rate is {:.2f}%".format(hist_type, weight_type, classification_rate*100))

***Version 1: Greyscale Only***

In [16]:
run_knn(class_list, image_names, 'Greyscale', 'Uniform')

Using Greyscale histogram and Uniform voting, classification rate is 15.97%


***Version 1b: Greyscale, weighted***

In [17]:
run_knn(class_list, image_names, 'Greyscale', 'Weighted')

Using Greyscale histogram and Weighted voting, classification rate is 15.47%


***Version 2: RGB***

In [21]:
run_knn(class_list, image_names, 'RGB', 'Uniform')

Using RGB histogram and Uniform voting, classification rate is 18.63%


***Version 2b: RGB, weighted***

In [22]:
run_knn(class_list, image_names, 'RGB', 'Weighted')

Using RGB histogram and Weighted voting, classification rate is 19.03%


***Version 3: RGB + Greyscale***

In [23]:
run_knn(class_list, image_names, 'Complete', 'Uniform')

Using Complete histogram and Uniform voting, classification rate is 19.50%


***Version 3b: RGB, weighted***

In [24]:
run_knn(class_list, image_names, 'Complete', 'Weighted')

Using Complete histogram and Weighted voting, classification rate is 18.77%
