In [1]:
import os
import cv2
import numpy as np
import mlflow
import joblib
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from keras_balanced_batch_generator import make_generator
from matplotlib import pyplot as plt
from matplotlib.image import imread

## Prepare data

In [2]:
examples = []
labels = []

for file in os.listdir("train"):
    img = cv2.resize(cv2.imread('train/' + file, 0), (200, 200), interpolation = cv2.INTER_CUBIC)
    examples.append(img)
    if file.startswith('cat'):
        labels.append(0)
    else:
        labels.append(1)

#examples = np.array(examples
#labels = np.array(labels, dtype=object)

In [None]:
type(examples[0])
# print(labels[:10])
# print(examples[:10])
# plt.imshow(examples[3], cmap='gray')
# print(os.listdir("train"))

## Descriptor

In [3]:
winSize = (200, 200)
blockSize = (100, 100)
blockStride = (20, 20)
cellSize = (50, 50)
    
def get_hog():
    nbins = 9
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64
    signedGradient = True

    hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma,
                            histogramNormType, L2HysThreshold, gammaCorrection, nlevels, signedGradient)

    return hog

In [None]:
#rand = np.random.RandomState(25)
#shuffle = rand.permutation(len(examples))
#examples, labels = np.array(examples)[shuffle], np.array(labels)[shuffle]

In [4]:
hog = get_hog()

In [5]:
hog_descriptors = []
for img in examples:
    hog_descriptors.append(hog.compute(img))
hog_descriptors = np.squeeze(hog_descriptors)

In [6]:
train_n = int(0.9 * len(hog_descriptors))
train_examples, test_examples, train_hog_descriptors, test_hog_descriptors, train_labels, test_labels =\
train_test_split(examples, hog_descriptors, labels, test_size=0.1, random_state=42, stratify = labels)
train_examples, test_examples, train_hog_descriptors, test_hog_descriptors, train_labels, test_labels =\
np.array(train_examples), np.array(test_examples), np.array(train_hog_descriptors), np.array(test_hog_descriptors), np.array(train_labels), np.array(test_labels) 
#train_hog_descriptors, test_hog_descriptors, train_labels, test_labels = train_test_split(hog_descriptors, labels, test_size=0.1, random_state=42, stratify = labels)

In [None]:
train_hog_descriptors.shape

## Grid search

In [None]:
# parameters = {"SVM__C": np.linspace(4.875, 4.877, 11),
#              "SVM__gamma": np.linspace(0.536, 0.538, 11),
#              "SVM__kernel": ["sigmoid"]}

In [None]:
# pipeline = Pipeline(steps=[('StandardScaler', StandardScaler()), ('SVM', SVC())])
# clf = GridSearchCV(pipeline, param_grid=parameters, n_jobs=-1)
# clf.fit(train_hog_descriptors[:100], train_labels[:100])

In [None]:
# clf.best_params_

## Algorithm

In [None]:
params = {
    'activation': 'relu',
    'batch_size': 128,
    'early_stopping': True,
    'hidden_layer_sizes': (1000, 800, 600),
    'learning_rate': 'adaptive',
    'learning_rate_init': 0.03,
    'n_iter_no_change': 30,
    'random_state': 42,
    'solver': 'sgd',
    'verbose': True    
}

model = MLPClassifier(**params)
clf = make_pipeline(StandardScaler(), model)
clf.fit(train_hog_descriptors, train_labels)
print("clf fitted")

pred = clf.predict(test_hog_descriptors)

In [11]:
accuracy = (accuracy_score(pred, test_labels)*100)
print(accuracy)

81.12


In [12]:
print(classification_report(test_labels, pred))

              precision    recall  f1-score   support

           0       0.81      0.82      0.81      1250
           1       0.82      0.80      0.81      1250

    accuracy                           0.81      2500
   macro avg       0.81      0.81      0.81      2500
weighted avg       0.81      0.81      0.81      2500



In [13]:
experiment_name = "MLPClassifier"
mlflow.set_experiment(experiment_name)
with mlflow.start_run():
    # Log MLP params
    for param in params.keys():
        mlflow.log_param(param, params[param])
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

## Saving model

In [14]:
# filename = 'MLP.sav'
# joblib.dump(clf, filename)

['MLP.sav']

## Load model

In [None]:
# loaded_model = joblib.load('MLP.sav')

In [None]:
kitty = cv2.resize(cv2.imread('kitty5.jpg', 0), (100, 100), interpolation = cv2.INTER_CUBIC)

my_hog_descriptors = []
my_hog_descriptors.append(hog.compute(kitty))
my_hog_descriptors = np.squeeze(my_hog_descriptors)
my_labels = np.array([0])

my_hog_descriptors.reshape(-1, 1).T.shape
print(accuracy_score(clf.predict(my_hog_descriptors.reshape(-1, 1).T), my_labels)*100)

In [None]:
plt.imshow(kitty, cmap = 'gray')