In [3]:
import skimage.io as io
from skimage.color import rgb2gray
import os
import numpy as np
import pandas as pd
from imp import reload
import sys
sys.path.append('../')
import helpers.image_processing as im
import helpers.histogram_classifier as hc

##### Histogram classifier

- Get the data
- Split into test vs. train 75% : 25%
- Train the model = create histogram for each class
- Test the model by predicting on the test data

Get the image classes and names:

In [4]:
classes = pd.read_csv('../data/food-101/meta/top_classes.csv', index_col=0)
class_list = list(classes['class'].unique())

image_names = {}
for c in class_list:
    image_names[c] = np.array(classes[classes['class'] == c]['name'])

In [23]:
def run_histogram_classifier(class_list, image_names, hist_type):
    """
    Initializes and runs histogram classifier model, and then tests the classification accuracy
    :params class_list: list of relevant image classes
    :params image_names: dictionary of image names for each class
    :params hist_type: name of method for calculating histogram; choices are: (greyscale_histogram, 
        color_histogram, complete_histogram)
    :return: prints out classification accuracy
    """    
    # Split into test and training data
    train = {}
    test = {}
    for k, v in image_names.items():
        np.random.shuffle(v)
        train[k] = v[:750]
        test[k] = v[750:]
    
    # Create the model
    model = {}
    for c in class_list:
        mean_image = im.mean_image(train[c], (512, 512, 3), os.path.join('../data/food-101/top_classes/', c, ''))
        if hist_type == 'Greyscale':
            mean_hist, mean_bins = hc.greyscale_histogram(mean_image)
        elif hist_type == 'RGB':
            mean_hist = hc.color_histogram(mean_image)
        elif hist_type == 'Complete':
            mean_hist = hc.complete_histogram(mean_image)
        model[c] = mean_hist
    
    # Evaluate the model
    for metric in ["intersection", "l1_norm", "euclid"]:
        actual_classes = []
        predicted_classes = []
        for c in class_list:
            for i in test[c]:
                img = io.imread(os.path.join('../data/food-101/top_classes/', c, '', i))
                if hist_type == 'Greyscale':
                    img_hist, img_bins = hc.greyscale_histogram(img)
                elif hist_type == 'RGB':
                    img_hist = hc.color_histogram(img)
                elif hist_type == 'Complete':
                    img_hist = hc.complete_histogram(img)

                prediction = hc.classify(img_hist, model, metric)
                actual_classes.append(c)
                predicted_classes.append(prediction)

        actual = np.array(actual_classes)
        predicted = np.array(predicted_classes)
        classification_rate = sum(actual == predicted) / len(predicted)

        print("Using {} histogram and {} metric, classification rate is {:.2f}%".format(hist_type, metric, classification_rate*100))

***Version 1 - Greyscale***

In [24]:
run_histogram_classifier(class_list, image_names, 'Greyscale')

Using Greyscale histogram and intersection metric, classification rate is 10.27%
Using Greyscale histogram and l1_norm metric, classification rate is 10.27%
Using Greyscale histogram and euclid metric, classification rate is 9.37%


***Version 2 - RGB***

In [25]:
run_histogram_classifier(class_list, image_names, 'RGB')

Using RGB histogram and intersection metric, classification rate is 10.50%
Using RGB histogram and l1_norm metric, classification rate is 10.50%
Using RGB histogram and euclid metric, classification rate is 9.93%


***Version 3 - RGB + Greyscale***

In [26]:
run_histogram_classifier(class_list, image_names, 'Complete')

Using Complete histogram and intersection metric, classification rate is 9.90%
Using Complete histogram and l1_norm metric, classification rate is 9.90%
Using Complete histogram and euclid metric, classification rate is 9.13%
