In [97]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [98]:
#Load the spam dataset:

data = np.array(pd.read_csv('../data/spambase.data', header=None))

X = data[:,:-1] # features
y = data[:,-1] # Last column is label
#  zeros labels must be negative (-1) for AdaBoost
y[y == 0] = -1 

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, shuffle=True, stratify=y)



Excercise 1. AdaBoot 
Implement AdaBoost using Python (incl. Numpy etc.) and use it on the SPAM-Dataset

1.The weak classifiers should be decision stumps (i.e. decision trees with one node).

(a) Print a confusion matrix.

(b) Is AdaBoost better when using stronger weak learners? Why or why not? Compare your results to using depth-2 decision trees.


In [260]:
# A decision stump is a machine learning model consisting of a one-level decision tree.
# That is, it is a decision tree with one internal node (the root) 
# which is immediately connected to the terminal nodes (its leaves).

# For continuous features, usually, some threshold feature value is selected, 
# and the stump contains two leaves — for values below and above the threshold. 
class OneLevelDecisionTree():
    def fit(self, feature_column, neg_label, pos_label, feature):
        self.feature = feature_column
        self.neg_label = neg_label
        self.pos_label = pos_label
        self.threshold = feature
        return self
        
    def predict(self, X):
        predictions = []
        
        return np.where(X[:, self.feature] >= self.threshold, self.neg_label, self.pos_label)
    
def classifier_boost(X, cls):
    boost = []
    neg_label = -1
    pos_label = 1
    
    for column in range(X.shape[1]):
        # passing by columns
        # get every unique feature in ordered way
        feature_set = sorted(set(X[:, column]))
        
        # threshold feature value is selected for every feature
        for threshold in feature_set:
            boost += [
                cls.fit(column, neg_label, pos_label, threshold),
                cls.fit(column, pos_label, neg_label, threshold)]
    return boost

In [315]:
import math

class AdaBoost():
    def __init__(self, cls_boost, boost_size):
        self.cls_boost = cls_boost
        self.boost_size = boost_size
        self.classifiers = []
        self.weights = []
        self.error = []
        
    # from the lecture:
    # error calculation looks how many times the prediction
    # of the model was wrong
    # ---> finaly replaced with inside the fit_train with a numpy boolean expression
    def compute_error(self, preds):
        for i in range(len(preds)):
            if preds[i] != self.y[i]:
                self.error.append(self.weights[i])
            else:
                continue
                
        
    def fit_train(self, X, y):
        self.X = X
        self.y = y
                
        # Step 1
        # initialise w_i = 1/N for all i from [1..N]
        # w = np.full_like(y, 1/len(y))
        
        # with the 1/N the each weight is too small = 0.00028986
        # trying with 1 as a initial weight
        w = np.ones(len(y))

        # Step 2.a - 1
        # for m from [1..M] of boost size
        # train a classifier f_m(x) -> [-1, 1] on X
        # and save all errors
        self.error = np.array([clf.predict(X) != y for clf in self.cls_boost])
 
        for m in range(self.boost_size):                   
            # Step 2.b
            # Compute classification error    
            errors = np.matmul(self.error, w)

            # indix of the minimum values
            min_err_idx = np.argmin(errors)
            self.classifiers += [self.cls_boost[min_err_idx]]
            
            # from lecture: sum of all the weights that were missclassified 
            # devided by the sum of all weights
            w_err = self.error[min_err_idx]
            w_sum = w.sum()

            E_m = (w_sum - w_err) / w_sum

            # Step 2.c
            # Compute classifier weight

            alpha_m = 0.5 * np.log((1 - E_m) / E_m)


            # Step 2.d
            # Recompute sample weights
            self.weights.append(alpha_m)
            
            w = w * np.exp(np.where(w_err, 1, -1) * self.weights[-1])
            
            self.error = np.delete(self.error, min_err_idx, axis=0)
            del self.cls_boost[min_err_idx]
            
            print("debug: done m = ", m)
            print("min_err_idx: ", min_err_idx)
            print("w_sum: ", w_sum)
            print("w_err: ", w_err)
            print("E_m: ", E_m)
            print("alpha_m : ", alpha_m )


        # convert into numpy array
        self.weights = np.array(self.weights)
            
        
    def predict(self, X):
        # print(self.weights.shape) -> (50, 3450)
        predictions = []
        weighted_preds = []                                                     
                                                             
        for cls in self.classifiers:
            predictions.append(cls.predict(X))
            
        predictions = np.array(predictions)
        # print(predictions.shape) --> (50, 1151)
                                                             
        weighted_preds = np.matmul(predictions.T, self.weights)
        
        print(weighted_preds)
        # return with sign
        return np.where(weighted_preds > 0, 1, -1)

        

In [316]:
one_cls = OneLevelDecisionTree()
boost = classifier_boost(X_train, one_cls)
print ("Weak: {} of type {}".format(len(boost), type(one_cls).__name__))

Weak: 26586 of type OneLevelDecisionTree


In [317]:
boost_size = 20
ada = AdaBoost(boost, boost_size)
ada.fit_train(X_train, y_train)
ada_predictions = ada.predict(X_test)



debug: done m =  0
min_err_idx:  0
w_sum:  3450.0
w_err:  [ True False False ...  True False False]
E_m:  [0.99971014 1.         1.         ... 0.99971014 1.         1.        ]
alpha_m :  [-4.07291981        -inf        -inf ... -4.07291981        -inf
        -inf]




debug: done m =  1
min_err_idx:  0
w_sum:  inf
w_err:  [ True False False ...  True False False]
E_m:  [nan nan nan ... nan nan nan]
alpha_m :  [nan nan nan ... nan nan nan]
debug: done m =  2
min_err_idx:  0
w_sum:  nan
w_err:  [ True False False ...  True False False]
E_m:  [nan nan nan ... nan nan nan]
alpha_m :  [nan nan nan ... nan nan nan]
debug: done m =  3
min_err_idx:  0
w_sum:  nan
w_err:  [ True False False ...  True False False]
E_m:  [nan nan nan ... nan nan nan]
alpha_m :  [nan nan nan ... nan nan nan]
debug: done m =  4
min_err_idx:  0
w_sum:  nan
w_err:  [ True False False ...  True False False]
E_m:  [nan nan nan ... nan nan nan]
alpha_m :  [nan nan nan ... nan nan nan]
debug: done m =  5
min_err_idx:  0
w_sum:  nan
w_err:  [ True False False ...  True False False]
E_m:  [nan nan nan ... nan nan nan]
alpha_m :  [nan nan nan ... nan nan nan]
debug: done m =  6
min_err_idx:  0
w_sum:  nan
w_err:  [ True False False ...  True False False]
E_m:  [nan nan nan ... nan nan na



In [318]:
def calculate_accuracy(labels, predictions):
        return np.mean(labels == predictions)


In [319]:
print(calculate_accuracy(y_train, ada_predictions))

0.6060869565217392


In [254]:

# So I will use my code from previous assignment to build a one
# THIS CELL IS COPIED FROM THE PREVIOUS ASSIGNMENT AND CONTAINS IMPLEMENTATION FOR A DECISSION TREE 


def cross_entropy(p):
        if p == 1 or p == 0: 
            # The entropy is zero if one event is certain
            return 0
        return - (p * np.log(p) + (1-p) * np.log((1-p)))

# Weight of a child node is number of samples in the node/total samples of all child nodes. 
# Similarly information gain is calculated with gini score. 
def children_entropy(feature, y):
    right = (feature == True).sum()/len(feature)
    left = 1 - right
    
    p = np.sum(y[feature])/len(y[feature]) 
    q = np.sum(y[np.invert(feature)])/len(y[np.invert(feature)])
    
    entropy_right = right * cross_entropy(p)
    entropy_left = left * cross_entropy(q)
    total_entropy = entropy_right + entropy_left
    return total_entropy, q, p

#====================================

class DecisionTree():
    
    def __init__(self, height=7):
        self.min_size = 4
        self.height = height
    
    # fit a basic binary tree for 2 classes classificaton 
    def fit(self, X, y):
        self.tree_size = 2**self.height - 1
        #print(self.tree_size)
        self.tmp_size = 2**(self.height + 1) - 1
        self.features = X.shape[1]
        self.tree = np.full(self.tmp_size, -1)
        self.tree_tmp = np.full(self.tmp_size + 1, -1)
        self.split_tree(X, y, 0)
    
    # binary tree
    def left_tree(self, leaf):
        return 2 * leaf + 1
    
    def right_tree(self, leaf):
        return 2 * leaf + 2
    
    
    def predict(self, X):
        predictions = []
        for x in X:
            idx = 0
            leaf = self.tree[idx]
            while self.tree[self.left_tree(idx)] != -1 or self.tree[self.right_tree(idx)] != -1:
                #print("idx:", idx)
                #print("leaf:", idx)

                if leaf >= self.tree_size:
                    return
                
                if x[leaf]:
                    idx = self.right_tree(idx)
                    #print("--------> right")
                else:
                    idx = self.left_tree(idx)
                    #print("left <--------")
                prediction = self.tree_tmp[idx]
                leaf = self.tree[idx]
            predictions += [prediction]
        return predictions
    
    
    def split_data(self, index, value, X):
        left, right = list(), list()
        for row in X:
            if row[index] < value:
                left.append(row)
            else:
                right.append(row)
        return left, right
        
    
    def split_tree(self, X, y, leaf):
  
        # parent node is a leaf
        #print("leaf", leaf)
        if leaf >= self.tree_size:
            return
        
        entropies = np.full(self.features, np.inf) 
        left = np.empty(self.features)
        right = np.empty(self.features)
        
        # for every feature variable
        for i, feature in enumerate(X.T):
            if np.sum(feature) == 0 or np.sum(np.invert(feature)) == 0:
                continue 
            entropies[i], left[i], right[i] = children_entropy(feature, y)
        
        min_entropy = np.argmin(entropies)
        
        right = X[:,min_entropy]
        left = np.invert(right)
        #print(left)
        
        #print("min_entropy", min_entropy)
        self.tree[leaf] = min_entropy
        if min_entropy < len(self.tree_tmp):
            if (min_entropy < len(left)) and (min_entropy < len(right)):
                self.tree_tmp[self.left_tree(leaf)] = left[min_entropy]
                self.tree_tmp[self.right_tree(leaf)] = right[min_entropy]
        
        if len(y[right]) == 0 or len(y[left]) == 0:
            return
        # grow tree 
        if leaf >= self.min_size:
            return
        self.split_tree(X[left], y[left], self.left_tree(leaf))
        self.split_tree(X[right], y[right], self.right_tree(leaf))
            



Excercise 2 (Bonus).

Viola-Jones Face Detection
Implement the Viola-Jones algorithm (without the cascade mechanism) and use it on a LFW-Face-subsetto classify faces.

(a) Visualize the top ten face classifiers

In [None]:
class ViolaJonesFaceDetection():
    def __init__(self, img):
        self.width = img.shape[1]
        self.height = img.shape[0]
        self.img = img
        # integral image to be calculated
        self.integral_img = np.zeros_like(img)
    
    # https://en.wikipedia.org/wiki/Summed-area_table
    # The summed-area table can be computed efficiently in a single pass over the image, 
    # as the value in the summed-area table at (x, y) is:
    # I(x,y)= i(x,y) +I(x,y-1) +I(x-1,y)- I(x-1,y-1)
    
    def calc_integral_image(self):
        for y in self.height:
            for x in self.width:
                self.integral_img[x, y] = self.img[x, y] + self.integral_img[x, y - 1] 
                + self.integral_img[x - 1, y] - self.integral_img[x - 1, y - 1]
        
        return self.integral_img

Excercise 3 (Bonus).
Cascade-Classification

Implement a cascade algorithm to classify faces in a picture of your choice 
(there should be more than a face on your image, e.g. skimage.data.astronaut())