In [1]:

import os
import cv2
import numpy as np
import mlflow
import joblib
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from keras_balanced_batch_generator import make_generator
from matplotlib import pyplot as plt
from matplotlib.image import imread

In [2]:

examples = []
labels = []
j=0
for file in os.listdir("train"):
    j=j+1
    if j>1000:
        break
    img = cv2.resize(cv2.imread('train/' + file, 0), (200, 200), interpolation = cv2.INTER_CUBIC)
    examples.append(img)
    if file.startswith('cat'):
        labels.append(0)
    else:
        labels.append(1)

In [3]:
winSize = (200, 200)
blockSize = (100, 100)
blockStride = (20, 20)
cellSize = (50, 50)
    
def get_hog():
    nbins = 9
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64
    signedGradient = True

    hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma,
                            histogramNormType, L2HysThreshold, gammaCorrection, nlevels, signedGradient)

    return hog

In [4]:
hog = get_hog()

In [16]:

hog_descriptors = []
j=0
for img in examples:
    hog_descriptors.append(hog.compute(img))
    if j==0:
        print(np.squeeze((hog.compute(img))))
        print(np.shape((hog.compute(img))))
    j=j+1
hog_descriptors = np.squeeze(hog_descriptors)
print(hog_descriptors.shape)

[0.09502677 0.04144662 0.04579363 ... 0.24097471 0.24097471 0.19064368]
(1296, 1)
(1000, 1296)


In [6]:
train_n = int(0.9 * len(hog_descriptors))
train_examples, test_examples, train_hog_descriptors, test_hog_descriptors, train_labels, test_labels =\
    train_test_split(examples, hog_descriptors, labels, test_size=0.1, random_state=42, stratify = labels)
train_examples, test_examples, train_hog_descriptors, test_hog_descriptors, train_labels, test_labels =\
    np.array(train_examples), np.array(test_examples), np.array(train_hog_descriptors), np.array(test_hog_descriptors), np.array(train_labels), np.array(test_labels) 
#train_hog_descriptors, test_hog_descriptors, train_labels, test_labels = train_test_split(hog_descriptors, labels, test_size=0.1, random_state=42, stratify = labels)

In [7]:
help(cv2.HOGDescriptor)


Help on class HOGDescriptor in module cv2.cv2:

class HOGDescriptor(builtins.object)
 |  Methods defined here:
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __repr__(self, /)
 |      Return repr(self).
 |  
 |  checkDetectorSize(...)
 |      checkDetectorSize() -> retval
 |      .   @brief Checks if detector size equal to descriptor size.
 |  
 |  compute(...)
 |      compute(img[, winStride[, padding[, locations]]]) -> descriptors
 |      .   @brief Computes HOG descriptors of given image.
 |      .       @param img Matrix of the type CV_8U containing an image where HOG features will be calculated.
 |      .       @param descriptors Matrix of the type CV_32F
 |      .       @param winStride Window stride. It must be a multiple of block stride.
 |      .       @param padding Padding
 |      .       @param locations Vector of Point
 |  
 |  computeGradient(...)
 |      computeGradient(img, grad, angleOfs[, pad

In [8]:
train_hog_descriptors.shape

(900, 1296)

In [9]:

params = {
#     'activation': 'relu',
#     'batch_size': 128,
#     'early_stopping': True,
#     'hidden_layer_sizes': (1000, 800, 600),
#     'learning_rate': 'adaptive',
#     'learning_rate_init': 0.03,
#     'n_iter_no_change': 30,
#     'random_state': 42,
#     'solver': 'sgd',
#     'verbose': True
    'C': 0.28,
    'gamma': 0.0032,
    'kernel': 'poly'
}

model = SVC(**params)
clf = make_pipeline(StandardScaler(), model)
clf.fit(train_hog_descriptors, train_labels)
print("clf fitted")

pred = clf.predict(test_hog_descriptors)

ValueError: The number of classes has to be greater than one; got 1 class

In [None]:

accuracy = (accuracy_score(pred, test_labels)*100)
print(accuracy)

In [None]:
print(classification_report(test_labels, pred))

In [None]:
experiment_name = "SVM"
mlflow.set_experiment(experiment_name)
with mlflow.start_run():
    # Log SVM params
    for param in params.keys():
        mlflow.log_param(param, params[param])
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)