# Image Classification with Support Vector Machines

We want to find out if an image is a company logo.

## The company logos we are working with

In [None]:
from IPython.core.display import display, HTML
from os import listdir
from os.path import isfile, join

mypath = "logos/"

# create a list containing all of the image paths
logoImages = [join(mypath, f) 
              for f in listdir(mypath) 
              if isfile(join(mypath, f))]

string = '<h1>' + str(len(logoImages)) + " Logos in directory</h1>";

for i in range(100): 
    string += '<img style="float:left;width:100px;height:70px;oveflow:hidden" src="' + logoImages[i] + '"</h1>';

display(HTML(string))

## The feature vector calculation

In [None]:
%matplotlib inline

from StringIO import StringIO
from PIL import Image # http://effbot.org/imagingbook/image.htm

# open image file, convert to RGB and return the feature vector
def process_image_file(image_path):
    print("Processing image " + image_path)
    image_fp = StringIO(open(image_path, 'rb').read())
    image = Image.open(image_fp)

    if not image.mode == 'RGB':
        print( "Image is not RGB.. converting..")

        # replace alpha channel with white color
        rgb_image = Image.new('RGB', image.size, (255, 255, 255))
        rgb_image.paste(image, None)        
        image = rgb_image
    return calculate_feature_vector(image)


def calculate_feature_vector(image, blocks=4):
    feature = [0.0] * blocks * blocks * blocks
    pixel_count = 0
    for pixel in image.getdata():
        ridx = int(pixel[0]/(256/blocks))
        gidx = int(pixel[1]/(256/blocks))
        bidx = int(pixel[2]/(256/blocks))
        idx = ridx + gidx * blocks + bidx * blocks * blocks
        feature[idx] += 1
        pixel_count += 1
    return [x/pixel_count for x in feature]




## Visualize some of the feature vectors using matplotlib

In [None]:

import matplotlib.pyplot as plt
from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
from scipy import misc

# initialize a "figure" = drawing
fig =  plt.figure(figsize=(16, 6)) 

for i in range(8): 
    image_path = logoImages[i]
    feature_vector = process_image_file(image_path)

    # axes of the graph 
    x = range(len(feature_vector)) # x => 0..63 = "ID" of the color = size of the feature vector
    y = feature_vector             # y => number of pixels in the color x (normalized)

    # add bar charts to the figure
    ax1 = fig.add_subplot(4,2,i+1)
    ax1.bar(x, y)
    
    # use "AnnotationBox" to put the image into the graph as well
    imagebox = OffsetImage(misc.imread(image_path), zoom=0.17)
    ab = AnnotationBbox(imagebox, [3, 0.6],
        xybox=(30., -30.),
        xycoords='data',
        boxcoords="offset points")                                  
    ax1.add_artist(ab)

plt.show()
    