In [2]:
import os
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
from vjutility import *

### Prepare Data Notebook

This notebook is usefull to prepare the data for the Viola-Jones openCV framework. This will automatically create the files needed for the training. Once the files are created, the training needs to be done separatelly. There is also a method to split the dataset into train/test.

All the auxiliary methods are defined in "utility_VJ.py"

### Split train test

In [5]:
folder_img="../dataset/images/style2"
folder_gt="../dataset/gt/style2"
folder_store="../dataset/train_test/style2"

print("Loading images from folder : ", folder_img)
#folder="dcm_dataset.git/images/images"
images, path_images = load_images_from_folder(folder_img)
print("Check dataset length : ", len(images))

# Load groundtruth :
print("Loading groundtruth from folder : ", folder_gt)
#folder="dcm_dataset.git\groundtruth"
groundtruth, path_groundtruth = load_all_groundtruth(folder_gt, 5)
print("Check groundtruth length : ", len(groundtruth))

# split dataset
print("Extracting train/test, storing in : ", folder_store)
split_train_test(images, groundtruth, folder_store, 0.2)

Loading images from folder :  ../dataset/images/style2
Check dataset length :  65
Loading groundtruth from folder :  ../dataset/gt/style2
Check groundtruth length :  65
Extracting train/test, storing in :  ../dataset/train_test/style2


### Framework data VJ

In [14]:
def framework(folder_img, folder_gt, resize, folder_store, pannel = True, visual_check = False) :
    
    """This function will handle all the nescessary steps to prepare the data
    
    --------Params----------
    folder_img : path to the folder containing the images
    folder_gt : parth to the folder containing the groundtruth
    resize : int, what size should all the images be set too
    folder_store : path to store the pannels if pannel = True
    pannel : whether or not to use the pannel as input images or the full image (pannel recommended)
    visual_check : If True, will print the 10 first pannels and their corresponding groundtruth to check everything went well
    
    --------Output----------
    If pannel == True : all the pannels and the resized groundtruth of the input images
    info.dat : File containing the positive images and their groundtruth
    bg.txt : File containing the negative images (no faces)
    
    """
    
    print("Loading images from folder : ", folder_img)
    images, path_images = load_images_from_folder(folder_img)
    print("Check dataset length : ", len(images))

    # Load groundtruth :
    print("Loading groundtruth from folder : ", folder_gt)
    groundtruth, path_groundtruth = load_all_groundtruth(folder_gt, 5)
    print("Check groundtruth length : ", len(groundtruth))

    # Change size :
    print("resizing to width = ", resize)
    resize_img_and_gt(resize, images, groundtruth)
    
#     # Add flipped images :
    print("Adding flipped images")
    add_flip(images, groundtruth)
    
#     # Blur images : 
    print("Bluring images")
    blur(images)
    
    # Extract pannels
    print("Extracting pannels and new gt, storing in : ", folder_store)
    get_set_pannels(images, groundtruth, folder_store, pannel)
    
    # Reload new values 
    print("Reloading new images and new gt from:", folder_store, "/img_train")
    images, path_image = load_images_from_folder(folder_store + "/img_train")
    print("Check dataset length : ", len(images))
    groundtruth, path_groundtruth = load_all_groundtruth(folder_store + "/gt_train", 4)
    print("Check groundtruth length : ", len(groundtruth))
    
    if(visual_check) :
        #Print first sample to check :
        for i in range(10):
            print_exemple(i, images, groundtruth, modified = True)
        
    #Write info.dat (For the OpenCV framework)
    print("Creating info.dat, make sure the previous one was deleted !")
    f = open("info.dat", "w")
    j = 0 
    for i in range(len(groundtruth)) :
        if groundtruth[i].shape[0] != 0 :
            j+=len(groundtruth[i])
            if j == 823 :
                print(path_image[i])
            f.write(path_image[i] + ' ' + str(len(groundtruth[i])) + ' ' + to_string(rewrite_coord(groundtruth[i])) + '\n')
    print("Number of pos samples :", j)
    f.close()
        
    #Write bg.txt (For the OpenCV framework)
    print("Creating bg.txt, make sure the previous one was deleted !")
    f = open("bg.txt", "w")
    j = 0 
    for i in range(len(groundtruth)) :
        if groundtruth[i].shape[0] == 0 :
            j+=1
            f.write(path_image[i] + '\n')
    print("Number of neg samples :", j)
    f.close()
    
    print("Run opencv framework !")

In [15]:
##Prepare the Parameters 


## Folder where to get the images for the training
folder_img="../dataset/train_test/complete/img_train"

## Folder where to get the groundtruth 
folder_gt="../dataset/train_test/complete/gt_train"

## Storing folder 
folder_store="pannels/complete/aug2"

## Size of the images
image_size = 800

## Whether or not to split the images in Pannels
pannel = True

In [16]:
framework(folder_img, folder_gt, image_size, folder_store, pannel, visual_check = True)

Loading images from folder :  ../dataset/train_test/complete/img_train
Check dataset length :  571
Loading groundtruth from folder :  ../dataset/train_test/complete/gt_train
Check groundtruth length :  571
resizing to width =  800
Adding flipped images
Bluring images
Extracting pannels and new gt, storing in :  pannels/complete/aug2
Reloading new images and new gt from: pannels/complete/aug2 /img_train
Check dataset length :  6852
Check groundtruth length :  6852
Creating info.dat, make sure the previous one was deleted !
pannels/complete/aug2/img_train\0102-0004.jpg
Number of pos samples : 8026
Creating bg.txt, make sure the previous one was deleted !
Number of neg samples : 2556
Run opencv framework !


### How to run ?

Once this framework is done running, two files were created : info.dat and bg.txt, they respectivelly correspond to the postives and negatives samples (images whit faces and images without any face). 

These are the usefull files to train the model. Once they are created :
- Open a terminal and go to the directory
- run this command : "path_to_open_cv"\opencv\build\x64\vc15\bin\opencv_createsamples.exe -info info.dat -w 20 -h 20 -num 500 -vec pos.vec (Obviously width, heigth and number of sample can be change). This will create a pos.vec file containing the information about the positives samples
- run : "path_to_opencv"\opencv\build\x64\vc15\bin\opencv_traincascade.exe -data store_folder -vec pos.vec -bg bg.txt -w 20 -h 20 -numPos 300 -numNeg 500 -numStages 5 -maxFalseAlarmRate 0.15 -minHitRate 0.95 (Again all parameters can be parametrized). This will train the model and save it to store_folder.