In [None]:
#Perceptron - margin --> given dataset (x_i,y_i) -> X belongs to R, Y belongs to {-1,1}

#####################
# MARGINS
#####################

# Need to find w, weight vector for functional margin : y_i(w.x_i) >= gamma > 0 --> unconstrained parameterisation - scaling of w is allowed and can be made arbitrarily large killing the notion of true confidence over actual data
# Geometric margin ( normalized / removes reparameterisation freedom ) : y_i(w.x_i)/ || w || 
# gamma --> min distance from decision boundary
# w.x = 0 defines a hyperplane that acts as the decision boundary 
# Margin also measures how stable the classiger is to perturbations of inputs => robustness measure , not how confident it is in classifying
# Generalisation is based on and controlled by geomtry of the solution , not raw size -> IF any model changes drastically under reparameterisation cant control generalisation.
# R is size/scale of the data / furthest point of your origin
# Mistake bound is <= (R/gamma)**2 --> this doesnt involve any feature or parameter count just data geometry and margin


####################
# PERCEPTRON
####################

# With Functional Margin : the classifier h(x) -> y_i( w * x_i ) > 0 for all i.
# With Geomtrical Margin : the classifier h(x) -> y_i( w * x_i )/ || w ||  > 0 for all i.
# Online learning : Start with some w (=0) -> Predict -> Check whether right or wrong -> Update weight vector ( using only x_i, y_i ) 
# Goals : Zero classification error on training set - Number of updates bounded by (R/gamma)**2 - Solution depends on geometry only not feature_count
# Finding any separator that is immune to reparameterisation and perturbations
# Converges iff margin assumption holds here :  the data is linearly separable by a positive margin

# After each mistake the property to be updated -> w
# The minimal change to that property to fix/improve on the mistake
# As y(w.x) is wrong so change w to w' and y(w'.x) is correct or w'(yx) > 0 , simplest local guaranteed choice w' proportional to yx
# yx = is the smallest update in parameter space that guarantees:
# a strictly positive correction, without assuming anything global, without introducing scale dependence, without guessing a margin
# This is just conceptual, to show effective change/gamma restriction have to use predefined gamma



# Gamma is achieved minimum geometric margin of the final w
# Functional - correctness 
# Geometric  - correctness and robustness

In [35]:
import numpy as np
np.random.seed(0)
c1 = np.random.randn(10, 2) + np.array([1, 1])
y1 = -np.ones(10)

c2 = np.random.randn(10, 2) + np.array([5, 5])
y2 = np.ones(10)

X = np.vstack((c1, c2))
Y = np.concatenate((y1, y2))

X = np.hstack((np.ones((X.shape[0], 1)), X))

indices = np.arange(X.shape[0])
np.random.shuffle(indices)

X = X[indices]
Y = Y[indices]

In [36]:
# With Functional Margin : the classifier h(x) -> y_i( w * x_i ) > 0 for all i.

def perceptron_functional(X,Y):
    n_samples, n_features = X.shape
    cycles_func = 0
    w = np.zeros(n_features)
    while not np.all(Y*(X@w)>0):
        cycles_func += 1
        for i in range(len(Y)):
            if (Y[i]*(np.dot(w,X[i])))<=0 :
                w = w + Y[i]*X[i]
    
    R = np.max(np.linalg.norm(X,axis=1))
    gamma = np.min(Y*(X@w) / np.linalg.norm(w)) if np.linalg.norm(w) != 0 else 0
    return cycles_func, R, gamma, w

c_f, r_f, g_f, w_f = perceptron_functional(X, Y)
print("============= Results for perceptron with functional margin updates =============")
print()
print(f"Cycles: {c_f}, R: {r_f}, Gamma: {g_f:.4f}, Bound: {(r_f/g_f)**2:.2f}")



Cycles: 9, R: 9.248232650471097, Gamma: 0.0616, Bound: 22554.87


In [37]:
# With Geomtrical Margin : the classifier h(x) -> y_i( w * x_i )/ || w ||  > 0 for all i with predfined gamma (without gamma same results as functional margin cause same updation logic)


def perceptron_geometric(X,Y,gamma):
    n_samples, n_features = X.shape
    cycles_geometric = 0
    w = np.zeros(n_features)
    
    while True:
        cycles_geometric += 1
        errors = 0
        for i in range(n_samples):
            norm_w = np.linalg.norm(w)
            margin = (Y[i]*np.dot(w,X[i])) / norm_w if norm_w > 0 else -np.inf
            if margin < gamma:
                w = w + Y[i]*X[i]
                errors += 1
        
        if errors == 0:
            break
    
    R_g = np.max(np.linalg.norm(X, axis=1))
    norm_w = np.linalg.norm(w)
    gamma_g = np.min(Y*(X@w) / norm_w) if norm_w != 0 else 0
    return cycles_geometric, R_g, gamma_g, w


c_g, r_g, g_g, w_g = perceptron_geometric(X, Y, 0.15)
print("\n============= Results for perceptron with geometric margin updates =============")
print()
print(f"Cycles: {c_g}, R: {r_g}, Gamma: {g_g:.4f}, Bound: {(r_g/g_g)**2:.2f}")




Cycles: 64, R: 9.248232650471097, Gamma: 0.1810, Bound: 2611.59


In [38]:
def generate_skewed_data():
    np.random.seed(0)
    c1 = np.random.randn(10, 2) + np.array([1, 1])
    y1 = -np.ones(10)
    c2 = np.random.randn(10, 2) + np.array([10, 10])
    y2 = np.ones(10)
    X = np.vstack((c1, c2))
    Y = np.concatenate((y1, y2))
    X = np.hstack((np.ones((X.shape[0], 1)), X))
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], Y[indices]

X, Y = generate_skewed_data()

c_f, r_f, g_f, w_f = perceptron_functional(X, Y)
print("=============Results for perceptron with functional margin updates=============")
print()
print(f"Cycles: {c_f}, R: {r_f}, Gamma: {g_f:.4f}, Bound: {(r_f/g_f)**2:.2f}")

c_g, r_g, g_g, w_g = perceptron_geometric(X, Y,0.15)
print("\n=============Results for perceptron with geometric margin updates=============")
print()
print(f"Cycles: {c_g}, R: {r_g}, Gamma: {g_g:.4f}, Bound: {(r_g/g_g)**2:.2f}")


Cycles: 4, R: 16.295741376194798, Gamma: 0.1162, Bound: 19673.94


Cycles: 6, R: 16.295741376194798, Gamma: 0.3018, Bound: 2914.73
