In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from PIL import Image
from timeit import default_timer as timer

from classifier import *
from utils import *
from cascade import *
import features as fe 
import utils as ut
import plotly.express as px

In [None]:
X, y = Create_data(1000)
X_train, X_test, y_train, y_test = Split_Data(X, y)

## Feature Analysis
We look the following plots
- Time taken to convert image into integral immage vs size of the image.
- No of feature vectors vs size of the image
- Time taken to compute feature vectors vs size of the image. (sequential + parallel)
- Time to compute $i^{th}$ fixed vector for a fixed size of image.
- Time to compute $i^{th}$ feature vs size of the image.

### Time taken to convert image into integral image vs size of the image.

In [None]:
image = Image.open("Data/faces/face.train/train/face/face00001.pgm")
image = np.array(image)
W,H = image.shape
print(W,H)
time = []
img_size = []
for i in range(W):
    img = image[:i,:i]
    s = timer()
    fe.caliculate_intergral_image(img)
    e = timer()
    time.append(1000*(e-s))
    img_size.append(i)

plt.plot(img_size,time,'o-')
plt.xlabel("Image size")
plt.ylabel("Time taken (ms)")
plt.title("Integral Image computation time Vs Image size")
plt.legend()
# plt.plot([0,1],[0,1])
plt.show()


### No of feature vectors vs size of the image

In [None]:
image = Image.open("Data/faces/face.train/train/face/face00001.pgm")
image = np.array(image)
W,H = image.shape
f = []
img_size = []
for i in range(2,W,1):
    img = image[:i,:i]
    rect = fe.get_rectanges(i,i)
    no_rect = fe.get_no_rectangles(i, i)
    nf = fe.feature_extraction_images(np.array([img]),rect,no_rect)
    img_size.append(i)
    f.append(nf.shape[1])  
plt.plot(img_size,f,'o-',)
plt.xlabel("Image size")
plt.ylabel("Number of Features")
plt.title("Number of Features Vs Image size")
plt.legend()
plt.show()

### Time taken to compute feature vectors vs size of the image.

In [None]:
t = []
img_size = []
for i in range(2,W,1):
    img = image[:i,:i]
    rect = fe.get_rectanges(i,i)
    no_rect = fe.get_no_rectangles(i, i)
    s = timer()
    nf = fe.feature_extraction_images(np.array([img]),rect,no_rect)
    e = timer()
    img_size.append(i)
    t.append(1000*(e-s))
plt.plot(img_size,t,'o-',)
plt.xlabel("Image size")
plt.ylabel("Time taken (ms)")
plt.title("Feature extraction time Vs Image size")
plt.show()

### Time to compute $i^{th}$ fixed vector for a fixed size of image.

In [None]:
IntImage = fe.caliculate_intergral_image(image)
no_features = fe.get_no_rectangles(19, 19)
rectangles = fe.get_rectanges(19, 19)
times = []
for i in range(no_features):
    s = timer()
    fe.get_nth_feature(IntImage, rectangles, i)
    e = timer()
    times.append(e - s)

# plot the time taken for each feature using plotly
fig = px.histogram(x=times, nbins=100)
fig.show()

### Time to compute $i^{th}$ feature vs size of the image.

In [None]:
times = []
for size in range(19):
    int_image = fe.caliculate_intergral_image(image[:size,:size])
    s = timer()
    fe.get_nth_feature(int_image, rectangles, 0)
    e = timer()
    times.append(e - s)

# plot the time taken for each feature using plotly
fig = px.histogram(x=times, nbins=100)
fig.show()

## Adaboost Analysis
We investigate the following plots
- Accuracy vs number of features used in the adaboost classifier.
- Positve Detection rate vs number of features used in the adaboost classifier.
- False Positive rate vs number of features used in the adaboost classifier.
- False Negative rate vs number of features used in the adaboost classifier
- Time taken to classify subwindows vs number of features used in adaboost classifier.

In [None]:
def details_vs_num_features(X_train, y_train, X_test, y_test):
    num_features = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    accuracy = []
    detection_rate = []
    false_positive_rate = []
    false_negative_rate = []
    for i in num_features:
        clf = AdaBoostClassifier()
        clf.fit(X_train, y_train, i)
        accuracy.append(clf.score(X_test, y_test))
        detection_rate.append(clf.detection_rate(X_test, y_test))
        false_positive_rate.append(clf.false_positive_rate(X_test, y_test))
        false_negative_rate.append(clf.false_negative_rate(X_test, y_test))
    return num_features, accuracy, detection_rate, false_positive_rate, false_negative_rate

num_features, accuracy, detection_rate, false_positive_rate,false_negative_rate = details_vs_num_features(X_train, y_train, X_test, y_test)


### Accuracy vs number of features used in the adaboost classifier.

In [None]:
fig = px.line(x=num_features, y=accuracy, title='Accuracy vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='Accuracy')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()


### Positve Detection rate vs number of features used in the adaboost classifier.

In [None]:
fig = px.line(x=num_features, y=detection_rate, title='Detection Rate vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='Detection Rate')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()

### False Positive rate vs number of features used in the adaboost classifier.

In [None]:
fig = px.line(x=num_features, y=false_positive_rate, title='False Positive Rate vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='False Positive Rate')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()


### False Negative rate vs number of features used in the adaboost classifier

In [None]:
fig = px.line(x=num_features, y=false_negative_rate, title='False Negative Rate vs Number of Features')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Features')
fig.update_yaxes(title_text='False Negative Rate')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()


### Time taken to classify subwindows vs number of features used in adaboost classifier.

In [None]:
X_train_fe, X_train, y_train = Create_data_imgs(200)
times = []
clf = AdaBoostClassifier()
for i in range(20):
    clf.fit(X_train, y_train, i)
    s = timer()
    clf.predict(X_train)
    e = timer()
    times.append(e - s)

# plot the time taken for each feature using plotly
fig = px.histogram(x=times, nbins=100)
fig.show()

## Cascade Analysis
We investigate the following plots
- number of training samples for each layer.
- composition of training samples for each layer.
- overall best accuraciy results.
- Time taken to classify vs no of subwindows to be classified

In [None]:
Strong_Classifiers,No_of_samples_per_layer,No_of_positive_samples_per_layer,No_of_negative_samples_per_layer = np.array(Train_Cascade(X_train, y_train))
print("the length of the strong classifiers is", len(Strong_Classifiers))

### number of training samples for each layer.

In [None]:
# print the number of samples per layer vs the number of layers
fig = px.line(x=range(len(No_of_samples_per_layer)), y=No_of_samples_per_layer, title='Number of Samples vs Number of Layers')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Layers')
fig.update_yaxes(title_text='Number of Samples')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()

### composition of training samples for each layer.

In [None]:
# tabulate the number of positive and negative samples per layer
table = pd.DataFrame({'Number of Positive Samples': No_of_positive_samples_per_layer, 'Number of Negative Samples': No_of_negative_samples_per_layer})
table



### Overall best accuracy results

### Time taken to classify vs no of subwindows to be classified

In [None]:
X_train, y_train = Create_data(200)
Strong_Classifiers, No_of_samples_per_layer, No_of_positive_samples_per_layer, No_of_negative_samples_per_layer = np.array(Train_Cascade(X_train, y_train))

X_test_fe, X_test, y_test = Create_data_imgs(1000)
times = [] 
for i in range(0, 1000, 10):
    s = timer()
    Cascade_Classifier_predict_Img(X_test[:i], y_test[:i], Strong_Classifiers)
    t = timer()
    times.append(t-s)

# plot the time taken to predict vs the number of images
fig = px.line(x=range(0, 1000, 10), y=times, title='Time Taken to Predict vs Number of Images')
# change the names of the x-axis and y-axis
fig.update_xaxes(title_text='Number of Images')
fig.update_yaxes(title_text='Time Taken to Predict')
# adjust the size of the figure
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
)
fig.show()