In [None]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


#### Importing required libraries

In [None]:
import numpy as np
import pickle
import time
import pandas as pd
import cv2
import timeit
from sklearn.feature_selection import SelectPercentile, f_classif
import csv

#### Reading the training and testing dataset
***I have used very less images due to not being able to upload the dataset on drive***<br>
750 images for training and 50 images for testing for each class.

For non-face images, scene recognition images are used.

In [None]:
def read_data():
  train = []
  test = []
  train_size = 750
  for i in range(750):
    img = cv2.imread('/content/drive/My Drive/COMPUTER VISION/Faces/'+str(i)+'.jpg') 
    img = cv2.resize(img, (200, 200))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #print(img.shape) 
    train.append([img,1])

    img = cv2.imread('/content/drive/My Drive/COMPUTER VISION/non_faces/'+str(i+1)+'.jpg') 
    #print('/content/drive/My Drive/COMPUTER VISION/non_faces/'+str(i+1)+'.jpg',img)
    img = cv2.resize(img, (200, 200))
    #print(img.shape) 
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #print(img.shape) 
    train.append([img,0])
  
  for i in range(750,800):
    img = cv2.imread('/content/drive/My Drive/COMPUTER VISION/Faces/'+str(i)+'.jpg') 
    img = cv2.resize(img, (200, 200))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #print(img.shape) 
    test.append([img,1])

    img = cv2.imread('/content/drive/My Drive/COMPUTER VISION/non_faces/'+str(i)+'.jpg') 
    img = cv2.resize(img, (200, 200))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #print(img.shape) 
    test.append([img,0])
  
  return train, test

### Integral images
Below function is used to compute integral image for a given input image

In [None]:
def get_integral_img(img):
  integral_img = np.array(img, copy=True)  
  integral_img = np.cumsum(integral_img,axis=0)
  integral_img = np.cumsum(integral_img,axis=1)
  return integral_img

### This function is used to prepare input dataset by making integral image for each input image

In [None]:
def prepare_data(data):
  n = len(data)
  X = []
  y = []
  #print(n)
  for i in range(n):
    X.append(get_integral_img(data[i][0]))
    y.append(data[i][1])
  X = np.array(X)
  y = np.array(y)
  return X,y

### Weights Initialization
In this function, we initialize weights for each input image depending on its class and number of images in it's class

The formula for calculating weights is : 

![picture](https://drive.google.com/uc?export=view&id=1UOdfQ8gK_yUG5CH36OQiieyifgrmb3h1)



In [None]:
def initialize_weights(labels):
  n = labels.shape[0]
  #print(n)
  weights = np.zeros(n)
  unique, counts = np.unique(labels, return_counts=True)
  neg_cnt, pos_cnt = counts[0], counts[1]
  #print(neg_cnt, pos_cnt)
  for i in range(n):
    if (labels[i]==0):
      weights[i] = 0.5*(1/neg_cnt)
    else:
      weights[i] = 0.5*(1/pos_cnt)
  return weights

### Rectangle Feature class
This class is used to store each feature easily as an object.
There are 4 data members - 
- x : X coordinate of top left corner
- y : Y coordinate of top left corner 
- W : Width of rectangle 
- H : Height of rectangle 

***calculate_feature_value()*** function is used to calculate sum of all pixels in given rectangle region using the fast method using integral images.

In [None]:
class Rectangle():
  def __init__(self, row, col, width, height):
    self.x = row
    self.y = col
    self.W = width
    self.H = height

  def calculate_feature_value(self, image):
    #print(image.shape,[self.x+self.H],[self.y+self.W],",,,",[self.x],[self.y],",,,",[self.x],[self.y+self.W],",,,",[self.x+self.H],[self.y] )
    return (image[self.x+self.H][self.y+self.W] + image[self.x][self.y]) - (image[self.x][self.y+self.W]+image[self.x+self.H][self.y])


### Feature 1 - 2 horizontal partitions

![picture](https://drive.google.com/uc?export=view&id=1S6MThFxH2Hqwy9IoC83RS99GBqY1Eedt)



In [None]:
def horizontal_rect_2(i, j, h, w, width, features):
  top = Rectangle(i, j, w, h)
  below = Rectangle(i+w, j, w, h)
  if (i + 2 * w < width): 
    features.append(([below], [top]))
  return features

### Feature 2 - 3 Horizontal partitions

![picture](https://drive.google.com/uc?export=view&id=1bV9PyTnYA_-m4vGrKUylKQQHCu6dTMX-)



In [None]:
def horizontal_rect_3(i, j, h, w, width, features):
  top = Rectangle(i, j, w, h)
  mid = Rectangle(i+w, j, w, h)
  bottom = Rectangle(i+2*w, j, w, h)
  if (i + 3 * w < width): 
    features.append(([mid], [bottom, top]))
  return features

### Feature 3 - 2 Vertical partitions

![picture](https://drive.google.com/uc?export=view&id=198bjYTg3UcWb1sWbp5DDTKK3mGFufHas)



In [None]:
def vertical_rect_2(i, j, h, w, height, features):
  left = Rectangle(i, j, w, h)
  right = Rectangle(i, j+h, w, h)
  if (j + 2 * h < height): 
    features.append(([left], [right]))
  return features

### Feature 4 - 3 Vertical partitions

![picture](https://drive.google.com/uc?export=view&id=1ANEJYtV_V48ZhHCMDMHXda74QSEor4Qz)



In [None]:
def vertical_rect_3(i, j, h, w, height, features):
  left = Rectangle(i, j, w, h)
  mid = Rectangle(i, j+h, w, h)
  right = Rectangle(i, j+2*h, w, h)
  if (j + 3 * h < height): 
    features.append(([mid], [right, left]))
  return features

### Feature 5 - 4 partitions

![picture](https://drive.google.com/uc?export=view&id=1GGpt21XF_Kj7IclqyYyBWUmvfoKRBi2x)



In [None]:
def rect_4(i, j, h, w, width, height, features):
  top_L = Rectangle(i, j, w, h)
  bot_L = Rectangle(i+w, j, w, h)
  top_R = Rectangle(i, j+h, w, h)
  bot_R = Rectangle(i+w, j+h, w, h)
  if (i + 2 * w < width and j + 2 * h < height):
    features.append(([bot_L, top_R], [top_L, bot_R]))
  return features

#### Below function is used to prepare all features sets that have to be applied on all the images.
 It outputs a list of all features.
 Each entry has 2 item lists:
 - one contains rectangle regions that have to be added(WHITE REGION)
 - second is the list of all regions which have to be subtracted(BLACK REGIONS)
 
All feature rectangle are of size 24 x 24.

This region is equally divided based on the type of feature (we have 5 of them)

In [None]:
def prepare_rect_features():
  width, height = 200, 200
  window_size = 24
  stride = 1
  features = []
  for i in range(0, width-window_size, stride):
    for j in range(0, height-window_size, stride):
      features = horizontal_rect_2(i, j, window_size//2, window_size, width, features)
      features = horizontal_rect_3(i, j, window_size//3, window_size, width, features)
      features = vertical_rect_2(i, j, window_size, window_size//2, height, features)
      features = vertical_rect_3(i, j, window_size, window_size//3, height, features)
      features = rect_4(i, j, window_size//2, window_size//2, width, height, features)

  return features

      

### Applying the features generated above on all traning images.
On doing so, each image generates 129536 features.

So, we get a data of dimention 1500 x 129536 for all 1500 input images.

In [None]:
def apply_features(data, features):
  print(data.shape, len(features), type(data.shape[0]), data.shape[0])
  X = np.zeros(( data.shape[0], len(features) ))
  n = data.shape[0]
  for i in range(n):
    print(i)
    j = 0
    for pos_regions, neg_regions in features:
      total_pos = sum([pos.calculate_feature_value(data[i]) for pos in pos_regions])
      total_neg = sum([neg.calculate_feature_value(data[i]) for neg in neg_regions])
      feature_val = total_pos - total_neg
      X[i][j]= feature_val
      j += 1

  return X



### Preparing training data of integral images

In [None]:
train, test = read_data()
X_train, y_train = prepare_data(train)
X_test, y_test = prepare_data(test)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1500, 200, 200) (1500,)
(100, 200, 200) (100,)


### Making rectangular harr features. 

In [None]:
features = prepare_rect_features()
print(len(features))

129536


#### Calculating value of each feature for all images

In [None]:

start = timeit.default_timer()
X = apply_features(X_train, features)
stop = timeit.default_timer()
print('Time: ', stop - start) 

Time:  3049.957049933


In [None]:
X

array([[ 1.6178e+04, -2.3191e+04, -1.2458e+04, ..., -6.2440e+03,
        -7.0760e+03, -7.8990e+03],
       [-1.3840e+03, -4.2739e+04, -7.2620e+03, ..., -1.5600e+02,
         1.8000e+02,  6.0500e+02],
       [ 3.2078e+04, -4.2780e+03,  2.2040e+03, ...,  1.8100e+02,
         1.9900e+02,  2.1500e+02],
       ...,
       [ 1.8220e+03, -2.8570e+04,  1.2000e+01, ..., -1.5000e+01,
        -1.9000e+01, -4.1000e+01],
       [ 3.0330e+03, -1.1165e+04, -1.1950e+03, ..., -6.9400e+02,
        -9.1300e+02, -9.3500e+02],
       [-7.2700e+02, -4.1160e+04, -2.5010e+03, ..., -1.5500e+02,
        -2.1200e+02, -2.6700e+02]])

#### Computing feature every time afresh takes a lot of time
So, I have saved the feature matrix computed in a csv file on my drive.

In [None]:
np.savetxt("/content/drive/My Drive/COMPUTER VISION/face_features.csv", X, delimiter=",")


#### Reading the precomputed feature matrix to save time from computing it again.

In [None]:
df = pd.read_csv('/content/drive/My Drive/COMPUTER VISION/face_features.csv')  
X_new = df.to_numpy()

#### Feature Selection to get important feature to speed up training and for dimentionality reduction. 

In [None]:
indices = SelectPercentile(f_classif, percentile=10).fit(X, y_train).get_support(indices=True)
X2 = X[:,indices]
features2 = np.array(features)
features2 = list(features2[indices])
print("Selected %d potential features" % len(features2))

Selected 12954 potential features


  This is separate from the ipykernel package so we can avoid doing imports until


#### Getting intial weights

In [None]:
weights = initialize_weights(y_train)
print(weights.shape)
print(weights)

(1500,)
[0.00066667 0.00066667 0.00066667 ... 0.00066667 0.00066667 0.00066667]


### AdaBoost algorithm for classifier learning

For t = 1,...,T
 - Normalize the weights
 -  For each feature, j , train a classifier h_j which is restricted to using a single feature. The error is evaluated with respect to w_t,

Error_j = ![picture](https://drive.google.com/uc?export=view&id=1lqVH-lL8YxDdbrcsoYTeaaNCpsq9saU3)

 - Choose the classifier, h_t , with the lowest error error_t.
 - Update the weights as follows :

 ![picture](https://drive.google.com/uc?export=view&id=1B7bUQW6j6YojcUYHUZaS4vBwqOr5okE0)
 
 where, 
  - e_i = 0 if example x_i is correctly classified
  - e_i = 1 otherwise
  - Beta_t = e_t / (1 - e_t)

- The final strong classifier is:

 ![picture](https://drive.google.com/uc?export=view&id=1ZwU4JU_vADQa4b1ENDx2753qkQ-vL1tq)



In [None]:
def train(integral_imgs, X, y, weights, features, T):
  alphas = []
  clfs = []
  for i in range(T):
    #print("T = ",i)
    norm_W = weights / np.linalg.norm(weights)
    #print("training weak clfs...")
    weak_clf = train_weak(X.T, y, features, weights)
    #print("selecting best weak clf...")
    clf, error, accuracy = select_best(weak_clf, weights, integral_imgs, y)
    beta = error / (1.0 - error)
    #print("calculating accuracy...")
    for i in range(len(accuracy)):
      weights[i] = weights[i] * (beta ** (1 - accuracy[i]))
    alpha = math.log(1.0/beta)
    alphas.append(alpha)
    clfs.append(clf)
  return alphas, clfs



#### Training a classifier for each of 129536 features. Each classifier takes into account only 1 feature for learning.

In [None]:

def train_weak(X, y, features, weights):
  idx_pos = np.where(y==1)
  idx_neg = np.where(y==0)
  tot_pos_wt = np.sum(weights[idx_pos])
  tot_neg_wt = np.sum(weights[idx_neg])
  CLFS = []
  n = X.shape[0]
  m = weights.shape[0]
  for i in range(n):
    pos_seen, neg_seen = 0, 0
    pos_weights, neg_weights = 0, 0
    min_error, best_feature, best_threshold, best_polarity = float('inf'), None, None, None
    for j in range(m):
      error = min(neg_weights + tot_pos_wt - pos_weights, pos_weights + tot_neg_wt - neg_weights)
      if (error < min_error):
        min_error = error
        best_feature = features[i]
        #print(best_feature)
        best_threshold = X[i][j]
        best_polarity = 1 if pos_seen > neg_seen else -1

      if (y[j] == 1):
        pos_seen += 1
        pos_weights += weights[j]
      else:
        neg_seen += 1
        neg_weights += weights[j]
    clf = WeakClassifier(best_feature[0], best_feature[1], best_threshold, best_polarity)
    CLFS.append(clf)
  return CLFS

#### Here we select the best classifier based on which one given leaast error upon classification of all images.

In [1]:
def select_best(classifiers, weights, X, y):
  best_clf = None
  best_error = np.inf
  best_accuracy = None
  n = X.shape[0]
  for clf in classifiers:
    error, accuracy = get_clf_stats(clf, X, y, weights)
    if (error < best_error):
      best_clf, best_error, best_accuracy = clf, error, accuracy
  return best_clf, best_error, best_accuracy

def get_clf_stats(clf, X, y, weights):
  error, accuracy = 0, []
  n = X.shape[0]
  for i in range(n):
    pred = abs(clf.classify(X[i]) - y[i])
    accuracy.append(pred)
    error += weights[i] * pred
  error = error / X.shape[0]
  return error, accuracy

#### Weak classifier
To store a weak classifier easily, we have this class.


In [None]:
class WeakClassifier:
    def __init__(self, pos_regions, neg_regions, threshold, polarity):
        self.pos_regions = pos_regions
        self.neg_regions = neg_regions
        self.threshold = threshold
        self.polarity = polarity
    
    def classify(self, img):
        total_pos = sum([pos.calculate_feature_value(img) for pos in self.pos_regions])
        total_neg = sum([neg.calculate_feature_value(img) for neg in self.neg_regions])
        feature_val = total_pos - total_neg
        pred = self.polarity * feature_val
        thresh = self.polarity * self.threshold
        if (pred < thresh):
          return 1
        return 0
  

#### Evaluation
Below function is used to detect face in test dataset.

We show confusion matrix and accuracy as output.

In [None]:
def predict(X, y, alphas, classifiers):
  conf_mat = np.zeros((2,2))
  alphas = np.array(alphas)
  n = len(alphas)
  threshold = np.sum(alphas)/2
  num_imgs = X.shape[0]
  for i in range(num_imgs):
    tmp = np.zeros(n)
    for j in range(n):
      tmp[j] = classifiers[j].classify(X[i])
    pred = np.matmul(alphas, tmp.T)
    if ( pred >= threshold ):
      out_class = 1
    else:
      out_class = 0
    conf_mat[y[i]][out_class] += 1
  #print(conf_mat)
  return conf_mat

In [None]:
alphas, classifiers = train(X_train, X2, y_train, weights, features2, 5)

In [None]:
conf_mat = predict(X_test, y_test, alphas, classifiers) 

In [None]:
acc = (conf_mat[0][0] + conf_mat[1][1])/100 * 100
print("Accuracy = ",acc)
print("Confusion Matrix :\n",conf_mat)

Accuracy =  57.99999999999999
Confusion Matrix :
 [[30. 20.]
 [22. 28.]]
