
# `Jupyter notebook on image classification with python and scikit learn`


In [1]:
import numpy as np
import os
from skimage.io import imread
from skimage.transform import resize

In [2]:
# prepare the data
input_dir = 'C:/Python_Programming/machine learning/drive-download/clf-data/clf-data'
categories = ['empty','not_empty']

In [3]:
data = []
labels = []
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(input_dir, category)):
        img_path = os.path.join(input_dir, category,file)
        img = imread(img_path)
        img = resize(img, (15,15))
        data.append(img.flatten())
        labels.append(category_idx)

In [4]:
data = np.asarray(data)
labels = np.asarray(labels)


In [5]:
# split the data into two sets ..
from sklearn.model_selection import train_test_split


In [6]:
x_train,x_test,y_train,y_test = train_test_split(data,labels, test_size=0.2,shuffle=True, stratify=labels)

## `Using Support Vector Classifier to classify the images`

In [7]:

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [8]:
svc = SVC()


In [9]:
parameters = [{'gamma': [0.01,0.001,0.0001],'C':[1,10,100,1000]}]

In [10]:
grid_search = GridSearchCV(svc,parameters)

In [11]:
grid_search.fit(x_train, y_train)

`some other parameters for hypertuning include kernel e.g kernel='models' models include: 'poly', 'rbf', 'sigmoid', 'linear','precomputed'`

In [12]:
best_estimator = grid_search.best_estimator_

In [13]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
y_prediction = best_estimator.predict(x_test)

In [14]:
score = accuracy_score(y_prediction, y_test)
score

1.0

In [15]:
# the svm model return a 100% accuracy


In [16]:
# save the model
import pickle
pickle.dump(best_estimator, open('./svm_image_classifier.p','wb'))