## Load the methods for feature vector generation

In [None]:
%run scripts/feature_vector.py


### Imports

In [None]:
from __future__ import division
from __future__ import print_function
from PIL import Image
from StringIO import StringIO
from sklearn import cross_validation
from sklearn import grid_search
from sklearn import svm
from sklearn import metrics
import sys
import os
import pickle

## Create all the feature vectors

In [None]:

training_path_a = 'logos/' 
training_path_b = 'non-logos/mixed/'
    
training_a = get_feature_vectors_from_directory(training_path_a)
training_b = get_feature_vectors_from_directory(training_path_b)

# data contains all the training data (a list of feature vectors)
data = training_a + training_b

# target is the list of target classes for each feature vector: a '1' for
# class A and '0' for class B
target = [1] * len(training_a) + [0] * len(training_b)


In [None]:
print("Set 0, size:", len(training_a))
print("Set 1, size:", len(training_b))

In [None]:

# split training data in a train set and a test set. The test set will
# containt 20% of the total
x_train, x_test, y_train, y_test = cross_validation.train_test_split(data,
        target, test_size=0.20)

# define the parameter search space
parameters = {'kernel': ['linear', 'rbf'], 'C': [1, 10, 100, 1000],
        'gamma': [0.01, 0.001, 0.0001]}

# search for the best classifier within the search space and return it
clf = grid_search.GridSearchCV(svm.SVC(), parameters).fit(x_train, y_train)
classifier = clf.best_estimator_

print()
print('Parameters:', clf.best_params_)
print()
print('Best classifier score')
print(metrics.classification_report(y_test,
    classifier.predict(x_test)))

# save classifier for later
with open('classifiers/classifier-logos-static-training-set.pickle', "w") as fp:
    pickle.dump(classifier, fp)


In [None]:
from IPython.core.display import display, HTML
                
# Load classifier from file
#with open('classifier-logos-static-training-set.pickle', "r") as fp:
#classifier = pickle.load(fp)

def test_directory(classifier, directory):
    number = 0
    string = ''
    for root, _, files in os.walk(directory):
        for file_name in files:
            number     += 1
            file_path   = os.path.join(root, file_name)
            img_feature = get_feature_vector_from_image_file(file_path)
            result      = classifier.predict([img_feature])
            string      = string + '<div class="res cl'+str(result[0])+'"style="float:left"><img src="' + file_path + '" width="100px" /></div>'

            if ( not(number % 5)):
                display(HTML(string))
                string =""
            if (number > 100 ):
                return

test_directory( classifier, 'logos/' )
#test_directory( classifier, 'non-logos/car' )
#test_directory( classifier, 'non-logos/schwierig' )


In [None]:
%%html
<style>
.res {margin:0 0 0 10px;border:5px solid red}
.cl1 {border-color:#ccc}
</style>