In [1]:
import os
import pickle

In [2]:
from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [3]:
# prepare data
input_dir = './clf-data'
categories = ['empty', 'not_empty']


In [4]:
data = []
labels = []
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(input_dir, category)):
        img_path = os.path.join(input_dir, category, file)
        img = imread(img_path)
        img = resize(img, (15, 15))
        data.append(img.flatten())
        labels.append(category_idx)

In [6]:
data = np.asarray(data)
labels = np.asarray(labels)

In [7]:
# train / test split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

In [8]:
# train classifier
classifier = SVC()

In [9]:
parameters = [{'gamma': [0.01, 0.001, 0.0001], 'C': [1, 10, 100, 1000]}]

In [10]:
grid_search = GridSearchCV(classifier, parameters)

In [11]:
grid_search.fit(x_train, y_train)

In [12]:
# test performance
best_estimator = grid_search.best_estimator_

y_prediction = best_estimator.predict(x_test)

score = accuracy_score(y_prediction, y_test)

In [13]:
print('{}% of samples were correctly classified'.format(str(score * 100)))

100.0% of samples were correctly classified


In [14]:
# Save the model using pickle
with open('./model.p', 'wb') as model_file:
    pickle.dump(best_estimator, model_file)