## Image preprocessing with HOG feature extraction

In [None]:
## loading packages

import os
import numpy as np
from skimage.feature import hog
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
## HOG configuration
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (2, 2)

hu_im_path = 'C:\\Users\\admin\\JupyterNotebook_works\\mtech-programing-labs\\S2\\Computer-vision\\Project\\image-dataset-github\\human'
nohu_im_path = 'C:\\Users\\admin\\JupyterNotebook_works\\mtech-programing-labs\\S2\\Computer-vision\\Project\\image-dataset-github\\nonhuman'

In [4]:
hu_im_listing = os.listdir(hu_im_path)
nohu_im_listing = os.listdir(nohu_im_path)

print('No. images having Human: ', len(hu_im_listing), '\nNo. images not having Human: ', len(nohu_im_listing))

No. images having Human:  1229 
No. images not having Human:  7126


In [6]:
data = []
labels = []

In [7]:
for file in hu_im_listing: 
    img = Image.open(hu_im_path + '\\' + file)
    img = img.resize((64,128))
    gray = img.convert('L') 
    hog_feat = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm='L2', feature_vector=True)
    data.append(hog_feat)
    labels.append(1)

In [8]:
for file in nohu_im_listing:
    img = Image.open(nohu_im_path + '\\' + file)
    img = img.resize((64,128))
    gray = img.convert('L')
    hog_feat = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm='L2', feature_vector=True) 
    data.append(hog_feat)
    labels.append(0)

In [9]:
data = np.array(data)

In [12]:
data[0]

array([0.10737484, 0.13949477, 0.07550298, ..., 0.08509003, 0.06562477,
       0.01888669])

In [10]:
print(data.shape)

(8355, 3780)


In [11]:
labels = np.array(labels)

## Modelling classifier

In [15]:
from sklearn.svm import LinearSVC
import time

from sklearn.model_selection import cross_val_score, #RepeatedStratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import joblib

In [16]:
#kfold = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42)
cv_result = cross_val_score(LinearSVC(), data, labels, scoring='f1', cv=5)



In [17]:
cv_result

array([0.46985447, 0.49236641, 0.5       , 0.48948375, 0.46783626,
       0.52851711, 0.49708738, 0.4743083 , 0.476     , 0.49799197,
       0.5115304 , 0.48742747, 0.46247465, 0.53203883, 0.46581197])

In [19]:
cv_result.std()

0.021011884439347654

In [18]:
cv_result.mean()

0.49018193029528956

In [29]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.20, random_state=40, stratify=labels)

In [30]:
print((len(y_train[y_train==0])/y_train.shape[0])*100, 
      (len(y_train[y_train==1])/y_train.shape[0])*100, '\n',
      (len(y_test[y_test==0])/y_test.shape[0])*100,
      (len(y_test[y_test==1])/y_test.shape[0])*100)

85.29323758228605 14.706762417713945 
 85.27827648114902 14.721723518850988


In [31]:
X_train.shape

(6684, 3780)

In [32]:
model_lsvc = LinearSVC()
model_lsvc.fit(X_train, y_train)
model_lsvc.score(X_train, y_train)



0.9910233393177738

In [33]:
time_bf = datetime.now()
y_pred_lsvc = model_lsvc.predict(X_test)
time_af = datetime.now()
time_dif = time_af - time_bf
print(round(time_dif.total_seconds()*1000, 3))
print(classification_report(y_test, y_pred_lsvc))

37.35
              precision    recall  f1-score   support

           0       0.92      0.90      0.91      1425
           1       0.47      0.52      0.50       246

    accuracy                           0.84      1671
   macro avg       0.69      0.71      0.70      1671
weighted avg       0.85      0.84      0.85      1671



In [34]:
confusion_matrix(y_test, y_pred_lsvc)

array([[1283,  142],
       [ 118,  128]], dtype=int64)

In [36]:
joblib.dump(model_lsvc, 'svm_model3.npy')

['svm_model3.npy']