In [None]:
#################################################
# ADABOOST 
#################################################
#
# we want a binary classifier of the form y_cap(x) = sign(F(x))
# F(x) is represented as a linear combination of weak classifiers (simple decision rules) with some weight
# F(x) = sum(alpha_i * h_i(x)) -> h_i(x) is the ith decision rule , alpha_i is its weight of how much it contributes to F(x) , h(x) -> {-1,1}
# Margin of point i -> gamma_i = y_i * F(x_i) - correct if > 0 else < 0
# Adaboost chooses exponential margin loss -> L(F) = sum(exp(-gamma_i))
# Define the weight of each data point as how much they contribute to the total loss -> w_i = exp(-y_i*F_t-1(x_i))   (at t iteration we would have the t-1 th version of F)
# Now add another weak rule alpha_t*h_t -> F_t = F_t-1 + alpha_t*h_t
# L = sum (w_t_i * (-alpha*y_i*x_i))  -> pick h_t from hypothesis space such that this new loss is minimum
# its weighted error is e_t = sum(w_t_i * 1[h(x_i) != y_i])  -> 1[] is 0-1 loss - count of wrong predictions * weight (here)
# alpha_t by minimisation = 1/2 * log (1-e_t/e_t)
# update F and thereby loss weights implicitly of the new F
#
# limitations / assumptions:
# assumes weak learners perform slightly better than random
# highly sensitive to label noise near the decision boundary
# optimizes exponential loss, not robustness
# can overfit by focusing excessively on hard or noisy points
# accuracy can look perfect even when margins deteriorate

In [None]:
import numpy as np

X = np.array([])
Y = np.array([])

def adaboost(X,Y,T):

    n_samples,n_features = X.shape
    w = np.ones(n_samples)/n_samples
    h_arr = []
    alpha_arr = []

    def fit_weak_learner(X,Y,w):
        opt_err = np.inf
        opt_h = None
        for feature in range(n_features):
            vals = np.unique(X[:, feature])
            thresholds = (vals[:-1] + vals[1:])/2
            for threshold in thresholds:
                for clas in [-1,1]:
                    pred = np.ones(n_samples) * (-clas)
                    pred[X[:,feature]<=threshold] = clas
                    error = np.sum(w[pred != Y])
                    if error<opt_err:
                        opt_err = error
                        opt_h = {"feature":feature,"threshold":threshold,"class":clas}
 

        pred = np.ones(n_samples)*(-opt_h['class'])
        pred[X[:, opt_h['feature']] <= opt_h['threshold']] = opt_h['class']
        return opt_h,pred
    

    for t in range(T):
        
        h,pred = fit_weak_learner(X,Y,w)
        epsilon = np.sum(w[pred != Y]) / np.sum(w)
        epsilon = np.sum(w[pred != Y]) / np.sum(w)
        # When adaboost finds a weak learner with zero weighted error
        # the exponential loss is already minimized -> the optimal step is infinite -> and the algorithm must add that learner once and stop
        if epsilon < 1e-12:
            alpha = 0.5 * np.log((1 - 1e-12) / 1e-12)
            h_arr.append(h)
            alpha_arr.append(alpha)
            break
        else:
            alpha = 0.5 * np.log((1 - epsilon) / epsilon)
            h_arr.append(h)
            alpha_arr.append(alpha)
            w *= np.exp(-alpha * Y * pred)


    return h_arr,alpha_arr
def predict(X_test, h_arr, alpha_arr):
    predictions = np.zeros(len(X_test))
    for h, alpha in zip(h_arr, alpha_arr):
        pred = np.ones(len(X_test)) * (-h['class'])
        pred[X_test[:, h['feature']] <= h['threshold']] = h['class']
        predictions += alpha * pred
    final_predictions = np.sign(predictions)
    final_predictions[final_predictions == 0] = -1
    return final_predictions

In [None]:
X = np.array([
    [1.0, 1.0],
    [1.2, 1.1],
    [0.9, 1.2],
    [1.1, 0.9],
    [3.0, 3.0],
    [3.1, 2.9],
    [2.9, 3.1],
    [3.2, 3.0]
])

Y = np.array([-1, -1, -1, -1, 1, 1, 1, 1])

T = 5

h_arr, alpha_arr = adaboost(X, Y, T)
predictions = predict(X, h_arr, alpha_arr)
accuracy = np.mean(predictions == Y)

print("==== CLEAN SEPARABLE ====")
print("Alphas:", [f"{a:.4f}" for a in alpha_arr])
print("Predictions:", predictions.astype(int))
print("True Labels:", Y)
print(f"Accuracy: {accuracy*100:.1f}%")
print("Expected: Perfect separation, should achieve 100% accuracy\n")
X = np.array([
    [0.0, 0.0],
    [0.0, 1.0],
    [1.0, 0.0],
    [1.0, 1.0]
])

Y = np.array([-1, 1, 1, -1])

T = 10

h_arr, alpha_arr = adaboost(X, Y, T)
predictions = predict(X, h_arr, alpha_arr)
accuracy = np.mean(predictions == Y)

print("==== XOR ====")
print("Alphas:", [f"{a:.4f}" for a in alpha_arr])
print("Predictions:", predictions.astype(int))
print("True Labels:", Y)
print(f"Accuracy: {accuracy*100:.1f}%")
print("Expected: XOR is not linearly separable, accuracy ~50% (random guessing)\n")
X = np.array([
    [1.0, 1.0],
    [1.1, 1.0],
    [0.9, 1.1],
    [1.2, 0.9],
    [3.0, 3.0],
    [3.1, 2.9],
    [2.9, 3.1],
    [3.2, 3.0]
])
Y = np.array([-1, -1, -1, -1, 1, 1, 1, -1])

T = 20

h_arr, alpha_arr = adaboost(X, Y, T)
predictions = predict(X, h_arr, alpha_arr)
accuracy = np.mean(predictions == Y)

print("==== SINGLE LABEL NOISE ====")
print("Alphas:", [f"{a:.4f}" for a in alpha_arr[:10]]) 
print("Predictions:", predictions.astype(int))
print("True Labels:", Y)
print(f"Accuracy: {accuracy*100:.1f}%")
print("Expected: AdaBoost may overfit to the noisy label, achieving high training accuracy\n")
X = np.array([
    [0.0, 0.0], [0.1, 0.1], [0.2, 0.2],  # Class -1
    [0.15, 0.15], [0.25, 0.25],           # Class 1 (overlapping)
    [1.0, 1.0], [1.1, 1.1], [1.2, 1.2]    # Class 1
])
Y = np.array([-1, -1, -1, 1, 1, 1, 1, 1])
T = 30

h_arr, alpha_arr = adaboost(X, Y, T)
predictions = predict(X, h_arr, alpha_arr)
accuracy = np.mean(predictions == Y)

print("==== OVERLAP / NO SEPARATION ====")
print("Alphas:", [f"{a:.4f}" for a in alpha_arr[:10]]) 
print("Predictions:", predictions.astype(int))
print("True Labels:", Y)
print(f"Accuracy: {accuracy*100:.1f}%")
print("Expected: Classes overlap, moderate accuracy, alphas may decrease over iterations\n")
X = np.array([
    [1.0, 0.0],
    [1.0, 0.1],
    [1.0, 0.2],
    [1.0, 0.3],
    [1.0, 1.0],
    [1.0, 1.1],
    [1.0, 1.2],
    [1.0, 1.3]
])

Y = np.array([-1, -1, -1, -1, 1, 1, 1, 1])

T = 5

h_arr, alpha_arr = adaboost(X, Y, T)
predictions = predict(X, h_arr, alpha_arr)
accuracy = np.mean(predictions == Y)

print("==== DEGENERATE FEATURE ====")
print("Alphas:", [f"{a:.4f}" for a in alpha_arr])
print("Predictions:", predictions.astype(int))
print("True Labels:", Y)
print(f"Accuracy: {accuracy*100:.1f}%")
print("Expected: Feature 0 is useless (constant), only feature 1 helps, should still achieve 100%\n")

==== CLEAN SEPARABLE ====
Alphas: ['13.8155']
Predictions: [-1 -1 -1 -1  1  1  1  1]
True Labels: [-1 -1 -1 -1  1  1  1  1]
Accuracy: 100.0%
Expected: Perfect separation, should achieve 100% accuracy

==== XOR ====
Alphas: ['0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000', '0.0000']
Predictions: [-1 -1 -1 -1]
True Labels: [-1  1  1 -1]
Accuracy: 50.0%
Expected: XOR is not linearly separable, accuracy ~50% (random guessing)

==== SINGLE LABEL NOISE ====
Alphas: ['0.9730', '0.8959', '0.8047', '0.7753', '0.7520', '0.7408', '0.7332', '0.7289', '0.7261', '0.7245']
Predictions: [-1 -1 -1 -1  1  1  1 -1]
True Labels: [-1 -1 -1 -1  1  1  1 -1]
Accuracy: 100.0%
Expected: AdaBoost may overfit to the noisy label, achieving high training accuracy

==== OVERLAP / NO SEPARATION ====
Alphas: ['0.9730', '1.2825', '0.6020', '0.7753', '0.7027', '0.7293', '0.7190', '0.7229', '0.7214', '0.7220']
Predictions: [-1 -1 -1  1  1  1  1  1]
True Labels: [-1 -1 -1  1  1  1 