In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

from classifier import *
import features as fe 
import utils as ut
from sklearn.model_selection import train_test_split
import plotly.express as px

In [2]:
def Create_data(Dataset_Size):
    rect = fe.get_rectanges(19, 19)
    no_rect = fe.get_no_rectangles(19, 19)
    X_train_face_img, X_train_nonface_img, X_test_img, y_train_face, y_train_nonface, y_test = ut.get_test_train_data()
    X_train_sub_nonface_img, y_train_sub_nonface = ut.random_subset(X_train_nonface_img, y_train_nonface, len(X_train_face_img))
    assert(len(X_train_face_img) == len(X_train_sub_nonface_img))
    assert(len(y_train_face) == len(y_train_sub_nonface))
    assert(len(X_train_face_img) == len(y_train_face))
    assert(len(X_train_sub_nonface_img) == len(y_train_sub_nonface))

    X_train_img = np.concatenate((X_train_face_img, X_train_sub_nonface_img))
    y_train = np.concatenate((y_train_face, y_train_sub_nonface))

    X_data, y_data = ut.random_subset(X_train_img, y_train, Dataset_Size)
    X_data_fe = fe.par_feature_extraction_images(X_data, rect, no_rect)
    return X_data_fe, y_data
    

def Split_Data(X_data, y_data):
    return train_test_split(X_data, y_data, test_size=0.2)

In [3]:
X, y = Create_data(400)
X_train, X_test, y_train, y_test = Split_Data(X, y)

number_of cpus =  16


## Feature Analysis
We look the following plots
- Time taken to convert image into integral immage vs size of the image.
- No of feature vectors vs size of the image
- Time taken to compute feature vectors vs size of the image. (sequential + parallel)
- Time to compute $i^{th}$ fixed vector for a fixed size of image.
- Time to compute $i^{th}$ feature vs size of the image.

### Time taken to convert image into integral immage vs size of the image.

### No of feature vectors vs size of the image

### Time taken to compute feature vectors vs size of the image.

### Time to compute $i^{th}$ fixed vector for a fixed size of image.

### Time to compute $i^{th}$ feature vs size of the image.

## Adaboost Analysis
We investigate the following plots
- Accuracy vs number of features used in the adaboost classifier.
- Positve Detection rate vs number of features used in the adaboost classifier.
- False Positive rate vs number of features used in the adaboost classifier.
- False Negative rate vs number of features used in the adaboost classifier
- Time taken to classify subwindows vs number of features used in adaboost classifier.

In [5]:
def details_vs_num_features(X_train, y_train, X_test, y_test):
    num_features = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    accuracy = []
    detection_rate = []
    false_positive_rate = []
    false_negative_rate = []
    for i in num_features:
        clf = AdaBoostClassifier()
        clf.fit(X_train, y_train, i)
        accuracy.append(clf.score(X_test, y_test))
        detection_rate.append(clf.detection_rate(X_test, y_test))
        false_positive_rate.append(clf.false_positive_rate(X_test, y_test))
        false_negative_rate.append(clf.false_negative_rate(X_test, y_test))
    return num_features, accuracy, detection_rate, false_positive_rate, false_negative_rate

num_features, accuracy, detection_rate, false_positive_rate,false_negative_rate = details_vs_num_features(X_train, y_train, X_test, y_test)


### Accuracy vs number of features used in the adaboost classifier.

In [6]:
fig = px.line(x=num_features, y=accuracy, title='Accuracy vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='Accuracy')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()


### Positve Detection rate vs number of features used in the adaboost classifier.

In [7]:
fig = px.line(x=num_features, y=detection_rate, title='Detection Rate vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='Detection Rate')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()

### False Positive rate vs number of features used in the adaboost classifier.

In [8]:
fig = px.line(x=num_features, y=false_positive_rate, title='False Positive Rate vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='False Positive Rate')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()


### False Negative rate vs number of features used in the adaboost classifier

In [9]:
fig = px.line(x=num_features, y=false_negative_rate, title='False Negative Rate vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='False Negative Rate')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()


### Time taken to classify subwindows vs number of features used in adaboost classifier.

## Cascade Analysis
We investigate the following plots
- number of training samples for each layer.
- composition of training samples for each layer.
- overall best accuraciy results.
- Time taken to classify vs no of subwindows to be classified

### number of training samples for each layer.

### composition of training samples for each layer.

### Overall best accuraciy results

### Time taken to classify vs no of subwindows to be classified