# Applications Projet HOG
## Application 1 : Entrainement d'un SVM
#### Importation des librairies

In [1]:
from skimage.feature import hog
from skimage.transform import pyramid_gaussian
from skimage.io import imread
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from skimage import color
from imutils.object_detection import non_max_suppression
import imutils
import numpy as np
import argparse
import cv2
import os
import glob
from PIL import Image # This will be used to read/modify images (can be done via OpenCV too)
from numpy import *
import time

### Entrainement du SVM avec les descripteurs de HOG

In [2]:
# define parameters of HOG feature extraction
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (2, 2)
threshold = .3


# define path to images:
pos_im_path = r"INRIAPerson/pos/" # This is the path of our positive input dataset
neg_im_path= r"INRIAPerson/negg/" # This is the path of our negative input dataset


# read the image files:
pos_im_listing = os.listdir(pos_im_path) # it will read all the files in the positive image path (so all the required images)
neg_im_listing = os.listdir(neg_im_path)
num_pos_samples = size(pos_im_listing) # simply states the total no. of images
num_neg_samples = size(neg_im_listing)
print(num_pos_samples) # prints the number value of the no.of samples in positive dataset
print(num_neg_samples)
data= []
labels = []


# data preprocessing time measurement (begin)
start_time = time.time()  


# compute HOG features and label them:
for file in pos_im_listing: #this loop enables reading the files in the pos_im_listing variable one by one
    img = Image.open(pos_im_path + file) # open the file
    img = img.resize((64,128)) # resize the image to fit with the model
    gray = img.convert('L') # convert the image into single channel i.e. RGB to grayscale
    # calculate HOG for positive features
    fd = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm='L2', feature_vector=True)# fd= feature descriptor
    data.append(fd)
    labels.append(1)
    
# Same for the negative images
for file in neg_im_listing:
    img= Image.open(neg_im_path + file)
    img = img.resize((64,128))
    gray= img.convert('L')
    # Now we calculate the HOG for negative features
    fd = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm='L2', feature_vector=True) 
    data.append(fd)
    labels.append(0)
    
# data preprocessing time measurement (end)
interval = time.time() - start_time


# encode the labels, converting them from strings to integers
le = LabelEncoder()
labels = le.fit_transform(labels)


# Partitioning the data into training and testing splits, 80% for training and 20% for testing
print(" Constructing training/testing split...")
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(data), labels, test_size=0.20, random_state=42)


# Train the linear SVM
print(" Training Linear SVM classifier...")
model = LinearSVC()


# execution time measurement (begin)
start_time = time.time() 


# model fitting
model.fit(trainData, trainLabels)


# execution time measurement (end)
interval_2 = time.time() - start_time


# Evaluate the classifier
print(" Evaluating classifier on test data ...")
predictions = model.predict(testData)
print(classification_report(testLabels, predictions))

# Affichage des temps de prétraitement et d'éxécution
print('\n Data preprocessing time : ', round(interval,2), 'sec')
print('\n Execution time : ', round(interval_2,2), 'sec')


# Save the model:
joblib.dump(model, 'model_hog.npy')

902
1667
 Constructing training/testing split...
 Training Linear SVM classifier...
 Evaluating classifier on test data ...
              precision    recall  f1-score   support

           0       0.85      0.87      0.86       325
           1       0.76      0.74      0.75       189

    accuracy                           0.82       514
   macro avg       0.81      0.80      0.80       514
weighted avg       0.82      0.82      0.82       514


 Data preprocessing time :  31.36 sec

 Execution time :  5.13 sec


['model_hog.npy']

### Entrainement du SVM avec des images en nuances de gris

In [3]:
# define parameters of HOG feature extraction
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (2, 2)
threshold = .3


# define path to images:
pos_im_path = r"INRIAPerson/pos/" # This is the path of our positive input dataset
neg_im_path= r"INRIAPerson/negg/" # This is the path of our negative input dataset


# read the image files:
pos_im_listing = os.listdir(pos_im_path) # it will read all the files in the positive image path (so all the required images)
neg_im_listing = os.listdir(neg_im_path)
num_pos_samples = size(pos_im_listing) # simply states the total no. of images
num_neg_samples = size(neg_im_listing)
print(num_pos_samples) # prints the number value of the no.of samples in positive dataset
print(num_neg_samples)
data= []
labels = []


# data preprocessing time measurement (begin)
start_time = time.time()  


# compute HOG features and label them:
for file in pos_im_listing: #this loop enables reading the files in the pos_im_listing variable one by one
    img = Image.open(pos_im_path + file) # open the file
    img = img.resize((64,128)) # resize the image to fit with the model
    gray = img.convert('L') # convert the image into single channel i.e. RGB to grayscale
    gray = np.array(gray)
    gray = gray.reshape((8192,)) #reshape the image to fit for the model
    data.append(gray)
    labels.append(1)
    
# Same for the negative images
for file in neg_im_listing:
    img= Image.open(neg_im_path + file)
    img = img.resize((64,128))
    gray= img.convert('L')
    gray = np.array(gray)
    gray = gray.reshape((8192,))
    data.append(gray)
    labels.append(0)
    

# data preprocessing time measurement (end)
interval = time.time() - start_time


# encode the labels, converting them from strings to integers
le = LabelEncoder()
labels = le.fit_transform(labels)


# Partitioning the data into training and testing splits, 80% for training and 20% for testing
print(" Constructing training/testing split...")
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(data), labels, test_size=0.20, random_state=42)


# Train the linear SVM
print(" Training Linear SVM classifier...")
model = LinearSVC()


# execution time measurement (begin)
start_time = time.time() 


# model fitting
model.fit(trainData, trainLabels)


# execution time measurement (end)
interval_2 = time.time() - start_time


# Evaluate the classifier
print(" Evaluating classifier on test data ...")
predictions = model.predict(testData)
print(classification_report(testLabels, predictions))

# Affichage des temps de prétraitement et d'éxécution
print('\n Data preprocessing time : ', round(interval,2), 'sec')
print('\n Execution time : ', round(interval_2,2), 'sec')


# Save the model:
joblib.dump(model, 'model_gray.npy')

902
1667
 Constructing training/testing split...
 Training Linear SVM classifier...
 Evaluating classifier on test data ...
              precision    recall  f1-score   support

           0       0.79      0.82      0.80       325
           1       0.67      0.61      0.64       189

    accuracy                           0.75       514
   macro avg       0.73      0.72      0.72       514
weighted avg       0.74      0.75      0.74       514


 Data preprocessing time :  27.87 sec

 Execution time :  20.49 sec




['model_gray.npy']

### Conclusion pour l'application 1:

- Avec les descipteurs de HOG
    - meilleur précision du modèle
    - meilleur temps d'apprentissage du modèle
- Avec les images en nuances de gris
    - meilleur temps de prétraitement des données (mais pas aussi significatif que le temps d'apprentissage du modèle)

## Application 2 : Détection de personne(s) en mouvement / piétons
#### Importation des librairies

In [4]:
import time
import cv2

### La fonction commentée

In [5]:
def myHogDetecttion(videoPATH, output_name, output_fps, output_format, win_size, padd_size, scale_value):
    """Detection of person on video with hog.detectMultiScale from OpenCV"""
    def inside(r, q):
        """Return the coordinates of the rectangle wich contain the detection"""
        rx, ry, rw, rh = r
        qx, qy, qw, qh = q
        return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh

    def draw_detections(img, rects, thickness = 1):
        """Draw the rectangle wich contain the detection on the picture"""
        for x, y, w, h in rects:
            # the HOG detector returns slightly larger rectangles than the real objects.
            # so we slightly shrink the rectangles to get a nicer output.
            pad_w, pad_h = int(0.15*w), int(0.05*h)
            cv2.rectangle(img, (x+pad_w, y+pad_h), (x+w-pad_w, y+h-pad_h), (0, 0, 255), thickness)
    
    # load the linear SVM Detector from HOG from OpenCV
    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    
    # Start the processing on the video
    cap = cv2.VideoCapture(videoPATH) # load the video
    fourcc = cv2.VideoWriter_fourcc(*'XVID') # initialize the encoder
    out = cv2.VideoWriter(output_name + '.avi',fourcc, output_fps, output_format) # initialize parameters for the output video
    
    # processing each image of the video
    while True:
        ret, frame = cap.read() #get the next image on the buffer
        if ret == True: # frame contain an image / else this is the end of the video
            frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) # 90° rotation because it is a portrait video
            frame = cv2.resize(frame,output_format,fx=0,fy=0, interpolation = cv2.INTER_CUBIC) #resize to match with the output video
            
            # Most important line : detection of human body on the multiscale with the given parameters 
            found, w = hog.detectMultiScale(frame, winStride=win_size, padding=padd_size, scale=scale_value)
            
            found_filtered = [] 
            # the next two loops draw the detections rectangles on the image
            for ri, r in enumerate(found):
                for qi, q in enumerate(found):
                    if ri != qi and inside(r, q):
                        break
                    else:
                        found_filtered.append(r)
                draw_detections(frame, found)
                draw_detections(frame, found_filtered, 3) # further filtering the box to improve visualisation.
            out.write(frame) # write the image on the output video
        else:
            break
    cap.release() # release the buffer
    out.release() # end of the output video
    cv2.destroyAllWindows() # release the buffer

In [6]:
#Initialisation of the parameters
vp = './Videos/VID_20201122_193855.mp4'
out_name, out_fps, out_size = 'output_detection1', 30, (128,256)
w_s, p_s, s_v = (4,4), (16,16), 1.05

# Execution time measurement
start_time = time.time() 

#Detection with Hog
myHogDetecttion(vp, out_name, out_fps, out_size, w_s, p_s, s_v)

interval = time.time() - start_time
print ('Execution time : ', round(interval,2), 'sec')
print('Video duration : 7 sec')

Execution time :  4.75 sec
Video duration : 7 sec


In [7]:
#Initialisation of the parameters
vp = './Videos/VID_20201122_193936.mp4'
out_name, out_fps, out_size = 'output_detection2', 30, (256,512)
w_s, p_s, s_v = (8,8), (16,16), 1.06

#Execution time measurement
start_time = time.time() 

#Detection with Hog
myHogDetecttion(vp, out_name, out_fps, out_size, w_s, p_s, s_v)

interval = time.time() - start_time  
print ('Execution time : ', round(interval,2), 'sec')
print('Video duration : 8 sec')

Execution time :  6.89 sec
Video duration : 8 sec


In [8]:
#Initialisation of the parameters
vp = './Videos/VID_20201122_194028.mp4'
out_name, out_fps, out_size = 'output_detection3', 30, (256,512)
w_s, p_s, s_v = (8,8), (16,16), 1.05

#Execution time measurement
start_time = time.time() 

#Detection with Hog
myHogDetecttion(vp, out_name, out_fps, out_size, w_s, p_s, s_v)

interval = time.time() - start_time
print ('Execution time : ', round(interval,2), 'sec')
print('Video duration : 20 sec')

Execution time :  19.45 sec
Video duration : 20 sec


In [9]:
#Initialisation of the parameters
vp = './Videos/VID_20201122_194121.mp4'
out_name, out_fps, out_size = 'output_detection4', 30, (256,512)
w_s, p_s, s_v = (8,8), (16,16), 1.08

#Execution time measurement
start_time = time.time() 

#Detection with Hog
myHogDetecttion(vp, out_name, out_fps, out_size, w_s, p_s, s_v)

interval = time.time() - start_time  
print ('Execution time : ', round(interval,2), 'sec')
print('Video duration : 5 sec')

Execution time :  4.18 sec
Video duration : 5 sec


### Conclusion pour l'application 2:

Grâce à l'ajustement judicieux des paramètres en entrée de la fonction on arrive à faire de la détection "en temp réel".

**Remarque :** en ajustant les paramètres afin d'obtenir une détection plus précise (bien expliqué sur cette page : https://www.pyimagesearch.com/2015/11/16/hog-detectmultiscale-parameters-explained/), le résultat est vraiment concluant !

**Remarque 2 :** les temps d'executions sont propres à chaque machine, il faut donc ajuster les paramètres en fonction de sa machine.