In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [4]:
#Load the spam dataset:

data = np.array(pd.read_csv('../data/spambase.data', header=None))

X = data[:,:-1] # features
y = data[:,-1] # Last column is label
#  zeros labels must be negative (-1) for AdaBoost
y[y == 0] = -1 

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, shuffle=True, stratify=y)



Excercise 1. AdaBoot 
Implement AdaBoost using Python (incl. Numpy etc.) and use it on the SPAM-Dataset

1.The weak classifiers should be decision stumps (i.e. decision trees with one node).

(a) Print a confusion matrix.

(b) Is AdaBoost better when using stronger weak learners? Why or why not? Compare your results to using depth-2 decision trees.


In [5]:
# A decision stump is a machine learning model consisting of a one-level decision tree.
# That is, it is a decision tree with one internal node (the root) 
# which is immediately connected to the terminal nodes (its leaves).

# For continuous features, usually, some threshold feature value is selected, 
# and the stump contains two leaves — for values below and above the threshold. 
class OneLevelDecisionTree():
    def __init__(self, feature_column, label_1, label_2, threshold=0):
        self.feature = feature_column
        self.label_1 = label_1
        self.label_2 = label_2
        self.threshold = threshold
        
    def predict(self, X):
        return np.where(X[:, self.feature] >= self.threshold, self.label_1, self.label_2)
    
def classifier_boost(X):
    boost = []   
    # passing by columns
    # creates the same number of stumps as the number of features. 
    for feature in range(X.shape[1]):
        # get every unique feature in ordered way
        for threshold in sorted(set(X[:, feature])):
            boost += [
                OneLevelDecisionTree(feature, 1, -1, threshold)]  
            '''
            boost += [
                OneLevelDecisionTree(feature, 1, -1, threshold),
                OneLevelDecisionTree(feature, -1, 1, threshold)
            ] 
            '''
    return boost

In [6]:
import math

class AdaBoost():
    def __init__(self, cls_boost, boost_size):
        self.cls_boost = cls_boost
        self.boost_size = boost_size
        self.classifiers = []
        self.weights = []
        
    # from the lecture:
    # error calculation looks how many times the prediction
    # of the model was wrong
    # ---> finaly replaced with inside the fit_train with a numpy boolean expression
    def compute_error(self, preds):
        for i in range(len(preds)):
            if preds[i] != self.y[i]:
                self.error.append(self.weights[i])
            else:
                continue
                
        
    def fit_train(self, X, y): 
        # Step 1
        # initialise w_i = 1/N for all i from [1..N]
        w = np.full_like(y, 1/len(y))
        
        # with the 1/N the each weight is too small = 0.00028986
        # trying with 1 as a initial weight
        # w = np.ones(len(y))
        
        # P.S: it does not change the predictions
        # the np.log in alpha should be with minus sign

        # Step 2.a - 1
        # for m from [1..M] of boost size
        # train a classifier f_m(x) -> [-1, 1] on X
        # and save all errors
        # diffs.shape:  (13292, 3450)
        diffs = np.array([clf.predict(X) != y for clf in self.cls_boost])
 
        for m in range(self.boost_size):                   
            # Step 2.b
            # Compute classification error    
            errors = diffs @ w

            # indix of the minimum values
            min_err_idx = np.argmin(errors)
            
            # collect classifiers
            self.classifiers += [self.cls_boost[min_err_idx]]
            
            # from lecture: sum of all the weights that were missclassified 
            # devided by the sum of all weights
            w_err = errors[min_err_idx]
            w_sum = w.sum()

            # The total error is the sum of all the errors in the classified record for sample weights.
            E_m = (w_sum - w_err) / w_sum

            # Step 2.c
            # Compute classifier weight

            # Formula for calculating Performance of Stump 
            alpha_m = -0.5 * np.log((1 - E_m) / E_m)

            # Step 2.d
            # Recompute sample weights
            self.weights += [alpha_m]
            
            # we must update the sample weight before proceeding for the next model or stage 
            # because if the same weight is applied, we receive the output from the first model.
            
            # For incorrectly classified records the formula is:
            # New Sample Weight = Sample Weight * e^(Performance) 
            # And for correctly classified records, we use the same formula with a negative sign with performance, 
            # so that the weight for correctly classified records will reduce compared to the incorrect classified ones. 
            # New Sample Weight = Sample Weight * e^- (Performance)
            
            # self.weights[-1] is the weight for correctly classified records after it was added into array
            # returns all indixes of the given row
            # non-zero values will be 1
            # zero values will be -1
            w = w * np.exp(np.where(diffs[min_err_idx], 1, -1) * self.weights[-1])
            
            diffs = np.delete(diffs, min_err_idx, axis=0)
            del self.cls_boost[min_err_idx]
            
            #print("debug: done m = ", m)

        # convert into numpy array
        print("debug: done.")
        self.weights = np.array(self.weights)
            
        
    def predict(self, X):
        preds = np.array([cl.predict(X) for cl in self.classifiers])
        weighted_preds = np.dot(self.weights, preds)
        return np.where(weighted_preds >= 0, 1, -1)

        

In [7]:
boost = classifier_boost(X_train)
print ("Weak: {} of type {}".format(len(boost), type(boost[0]).__name__))

Weak: 13293 of type OneLevelDecisionTree


In [8]:
boost_size = 100
ada = AdaBoost(boost, boost_size)
ada.fit_train(X_train, y_train)
ada_predictions = ada.predict(X_test)

debug: done.


In [9]:
def calculate_accuracy(labels, predictions):
        return np.mean(labels == predictions)


In [10]:
print(y_test.shape == ada_predictions.shape)
print("y_test.shape: ", y_test.shape, "ada_predictions.shape: ",ada_predictions.shape)
print(ada_predictions)
print(calculate_accuracy(y_test, ada_predictions))

True
y_test.shape:  (1151,) ada_predictions.shape:  (1151,)
[-1  1 -1 ... -1  1 -1]
0.9079061685490878


In [11]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(ada_predictions, y_test))

[[647  56]
 [ 50 398]]


In [12]:

# So I will use my code from previous assignment to build a one
# THIS CELL IS COPIED FROM THE PREVIOUS ASSIGNMENT AND CONTAINS IMPLEMENTATION FOR A DECISSION TREE 


def cross_entropy(p):
        if p == 1 or p == 0: 
            # The entropy is zero if one event is certain
            return 0
        return - (p * np.log(p) + (1-p) * np.log((1-p)))

# Weight of a child node is number of samples in the node/total samples of all child nodes. 
# Similarly information gain is calculated with gini score. 
def children_entropy(feature, y):
    right = (feature == True).sum()/len(feature)
    left = 1 - right
    
    p = np.sum(y[feature])/len(y[feature]) 
    q = np.sum(y[np.invert(feature)])/len(y[np.invert(feature)])
    
    entropy_right = right * cross_entropy(p)
    entropy_left = left * cross_entropy(q)
    total_entropy = entropy_right + entropy_left
    return total_entropy, q, p

#====================================

class DecisionTree():
    
    def __init__(self, height=7):
        self.min_size = 4
        self.height = height
    
    # fit a basic binary tree for 2 classes classificaton 
    def fit(self, X, y):
        self.tree_size = 2**self.height - 1
        #print(self.tree_size)
        self.tmp_size = 2**(self.height + 1) - 1
        self.features = X.shape[1]
        self.tree = np.full(self.tmp_size, -1)
        self.tree_tmp = np.full(self.tmp_size + 1, -1)
        self.split_tree(X, y, 0)
    
    # binary tree
    def left_tree(self, leaf):
        return 2 * leaf + 1
    
    def right_tree(self, leaf):
        return 2 * leaf + 2
    
    
    def predict(self, X):
        predictions = []
        for x in X:
            idx = 0
            leaf = self.tree[idx]
            while self.tree[self.left_tree(idx)] != -1 or self.tree[self.right_tree(idx)] != -1:
                #print("idx:", idx)
                #print("leaf:", idx)

                if leaf >= self.tree_size:
                    return
                
                if x[leaf]:
                    idx = self.right_tree(idx)
                    #print("--------> right")
                else:
                    idx = self.left_tree(idx)
                    #print("left <--------")
                prediction = self.tree_tmp[idx]
                leaf = self.tree[idx]
            predictions += [prediction]
        return predictions
    
    
    def split_data(self, index, value, X):
        left, right = list(), list()
        for row in X:
            if row[index] < value:
                left.append(row)
            else:
                right.append(row)
        return left, right
        
    
    def split_tree(self, X, y, leaf):
  
        # parent node is a leaf
        #print("leaf", leaf)
        if leaf >= self.tree_size:
            return
        
        entropies = np.full(self.features, np.inf) 
        left = np.empty(self.features)
        right = np.empty(self.features)
        
        # for every feature variable
        for i, feature in enumerate(X.T):
            if np.sum(feature) == 0 or np.sum(np.invert(feature)) == 0:
                continue 
            entropies[i], left[i], right[i] = children_entropy(feature, y)
        
        min_entropy = np.argmin(entropies)
        
        right = X[:,min_entropy]
        left = np.invert(right)
        #print(left)
        
        #print("min_entropy", min_entropy)
        self.tree[leaf] = min_entropy
        if min_entropy < len(self.tree_tmp):
            if (min_entropy < len(left)) and (min_entropy < len(right)):
                self.tree_tmp[self.left_tree(leaf)] = left[min_entropy]
                self.tree_tmp[self.right_tree(leaf)] = right[min_entropy]
        
        if len(y[right]) == 0 or len(y[left]) == 0:
            return
        # grow tree 
        if leaf >= self.min_size:
            return
        self.split_tree(X[left], y[left], self.left_tree(leaf))
        self.split_tree(X[right], y[right], self.right_tree(leaf))
            



In [13]:
def classifier_boost_2d(X, y):
    boost = []   
    # passing by columns
    # creates the same number of stumps as the number of features. 
    for feature in range(X.shape[1]):
        # get every unique feature in ordered way
        for threshold in sorted(set(X[:, feature])):
            tree = DecisionTree(height = 2)
            tree.fit(X, y)
            boost += [tree]  
            '''
            boost += [
                OneLevelDecisionTree(feature, 1, -1, threshold),
                OneLevelDecisionTree(feature, -1, 1, threshold)
            ] 
            '''
    return boost

In [14]:
means = (np.mean(X_train[y_train==1], axis=0) + np.mean(X_train[y_train==-1])) / 2 
                  
X_train_means = (X_train > means)
X_test_means = X_test > means


In [15]:
boost_tree = classifier_boost_2d(X_train_means, y_train)
print ("Weak: {} of type {}".format(len(boost), type(boost_tree[0]).__name__))

  if __name__ == '__main__':


Weak: 13193 of type DecisionTree


In [16]:
boost_size_tree = 100
ada_tree = AdaBoost(boost_tree, boost_size_tree)
ada_tree.fit_train(X_train_means, y_train)
ada_predictions_tree = ada_tree.predict(X_test_means)

debug: done.


In [17]:
print(y_test.shape == ada_predictions_tree.shape)
print("y_test.shape: ", y_test.shape, "ada_predictions.shape: ",ada_predictions_tree.shape)
print(ada_predictions_tree)
print(calculate_accuracy(y_test, ada_predictions_tree))

from sklearn.metrics import confusion_matrix
print(confusion_matrix(ada_predictions, y_test))

True
y_test.shape:  (1151,) ada_predictions.shape:  (1151,)
[-1 -1 -1 ... -1 -1 -1]
0.6038227628149435
[[647  56]
 [ 50 398]]


# ** BONUS TASK **

Excercise 2 (Bonus).

# Viola-Jones Face Detection
Implement the Viola-Jones algorithm (without the cascade mechanism) and use it on a LFW-Face-subsetto classify faces.

(a) Visualize the top ten face classifiers

In [None]:
class ViolaJonesFaceDetection():
    def __init__(self, img):
        self.width = img.shape[1]
        self.height = img.shape[0]
        self.img = img
        # integral image to be calculated
        self.integral_img = np.zeros_like(img)
    
    # https://en.wikipedia.org/wiki/Summed-area_table
    # The summed-area table can be computed efficiently in a single pass over the image, 
    # as the value in the summed-area table at (x, y) is:
    # I(x,y)= i(x,y) +I(x,y-1) +I(x-1,y)- I(x-1,y-1)
    
    def calc_integral_image(self):
        for y in self.height:
            for x in self.width:
                self.integral_img[x, y] = self.img[x, y] + self.integral_img[x, y - 1] 
                + self.integral_img[x - 1, y] - self.integral_img[x - 1, y - 1]
        
        return self.integral_img

Excercise 3 (Bonus).
# Cascade-Classification

Implement a cascade algorithm to classify faces in a picture of your choice 
(there should be more than a face on your image, e.g. skimage.data.astronaut())