In [1]:
################## DUAL SVM ###################
# 
# Primal -> the variable being optimized is w and we do it via norm minimisation for robustness and hinge loss for correctness
# Dual   -> the unknown variable is alpha ( how much does a particular constraint resist being satisfied (to a given separator)) , 
# which constraints actually shape the separator
#
#
# Geomtric view at constraints -> each constraint equation gives half-spaces ( in (w,b) space ) 
# primal was smallest norm w that lies inside all those subspaces, dual theres no assumed separator we wanna know if the intersection is non-empty
#
#
# Each constraint is given a value alpha, which tells how much does this contribute to the optimum separator, support vectors ( alpha != 0) naturally come out as they refuse to relax
#
#
# Lagrangian is 1/2 * || w || ** 2 - summation (alpha_i)(y_i(w*x_i +b)-1), first term penalizes sensitivity, second enforces constraints
# Duality arrives here where we say min{w,b} max{alpha>=0} L >= max{alpha>=0}min{w,b} and we take the equal condition
# Differentiate L wrt w -> w = summation(w_i*x_i*y_i) --> This gives another important result that optimum data can be obtained in span of given data only
# as any component orthogonal to w increases || w ||
# Differentiate L wrt b  get summation alpha_i*y_i=0
#
#
# Substitute the w and b to get the inner product form the equation 
# KKT conditions justify our previous logical observations of behavior of alpha and constraint equations
# 
# If data is not separable -> Primal is infeasible -> Dual objective becomes unbounded above -> Soft margin SVM prevents that only (alpha <= C)
#
# Updates : differentiate dual wrt alpha_k -> 1- summation alpha_i*y_i*y_k*(x_i.x_k)

In [2]:
import numpy as np

In [3]:
def hard_dual_svm(X, Y, step_size, max_iter=20000, tol=1e-6):
    n_features, n_samples = X.shape
    alpha = np.zeros(n_samples)
    K = np.dot(X.T, X)
    H = (Y[:, None] * Y[None, :]) * K
    cycles = 0
    for i in range(max_iter):
        cycles += 1
        alpha_prev = np.copy(alpha)
        gradient = np.ones(n_samples) - np.dot(H, alpha)
        alpha += step_size * gradient
        alpha = np.maximum(0, alpha)
        if np.linalg.norm(alpha - alpha_prev) < tol:
            break
    w = np.dot(X, alpha * Y)
    sv_idx = np.where(alpha > 1e-7)[0]
    b = np.mean(Y[sv_idx] - np.dot(w, X[:, sv_idx])) if len(sv_idx) > 0 else 0.0
    check = np.array([Y[i] * (np.dot(w, X[:, i]) + b) for i in range(n_samples)])
    return w, b, cycles,check

In [4]:
def soft_dual_svm(X, Y, C, step_size, max_iter=20000, tol=1e-6):
    n_features, n_samples = X.shape
    alpha = np.zeros(n_samples)
    K = np.dot(X.T, X)
    H = (Y[:, None] * Y[None, :]) * K
    cycles = 0
    for i in range(max_iter):
        cycles += 1
        alpha_prev = np.copy(alpha)
        gradient = np.ones(n_samples) - np.dot(H, alpha)
        alpha += step_size * gradient
        alpha = np.clip(alpha, 0, C)
        if np.linalg.norm(alpha - alpha_prev) < tol:
            break
    w = np.dot(X, alpha * Y)
    sv_idx = np.where((alpha > 1e-7) & (alpha < C))[0]
    if len(sv_idx) == 0: sv_idx = np.where(alpha > 1e-7)[0]
    b = np.mean(Y[sv_idx] - np.dot(w, X[:, sv_idx])) if len(sv_idx) > 0 else 0.0
    check = np.array([Y[i] * (np.dot(w, X[:, i]) + b) for i in range(n_samples)])
    return w, b, cycles,check

In [5]:
X = np.array([
    [1.0, 1.0], [1.2, 0.9], [0.9, 1.2], [1.1, 1.0],
    [3.0, 3.1], [2.8, 2.9], [3.2, 3.0], [3.1, 3.1]
]).T
Y = np.array([-1, -1, -1, -1, 1, 1, 1, 1])
step_size = 1e-2

w_h, b_h, c_h,check_h = hard_dual_svm(X, Y, step_size)
print("================= SEPARABLE: HARD SVM ====================")
print(f"Cycles : {c_h} | b_h : {b_h:.4f} | ||w_h|| : {np.linalg.norm(w_h):.4f} | Check : {check_h}")
print(f"w_h : {w_h}")

w_s, b_s, c_s, check_s = soft_dual_svm(X, Y, 1.0, step_size)
print("\n================= SEPARABLE: SOFT SVM ====================")
print(f"Cycles : {c_s} | b_s : {b_s:.4f} | ||w_s|| : {np.linalg.norm(w_s):.4f} | Check : {check_s}")
print(f"w_s : {w_s}")

Cycles : 20000 | b_h : -0.3914 | ||w_h|| : 0.7285 | Check : [0.19559144 0.33752043 0.03408301 0.23637462 0.25636014 0.21720124
 0.11443116 0.21557696]
w_h : [-0.40783178  0.60362626]

Cycles : 19788 | b_s : -0.0000 | ||w_s|| : 0.2433 | Check : [-0.32609424 -0.32595289 -0.35884508 -0.33691695  1.00007068  0.93485171
  0.99992932  1.01089339]
w_s : [0.10822709 0.21786774]


In [6]:
X = np.array([
    [1.0, 1.0], [1.2, 0.9], [0.9, 1.2], [1.1, 1.0],
    [2.0, 2.0], # Conflicting outlier
    [3.0, 3.1], [2.8, 2.9], [3.2, 3.0]
]).T
Y = np.array([-1, -1, -1, 1, -1, 1, 1, 1])
step_size = 1e-3

w_h, b_h, c_h,check_h = hard_dual_svm(X, Y, step_size)
print("================= OVERLAPPING: HARD SVM ====================")
print(f"Cycles : {c_h} | b_h : {b_h:.4f} | ||w_h|| : {np.linalg.norm(w_h):.4f} | Check : {check_h}")
print(f"w_h : {w_h}")

w_s, b_s, c_s,check_s = soft_dual_svm(X, Y, 1.0, step_size)
print("\n================= OVERLAPPING: SOFT SVM ====================")
print(f"Cycles : {c_s} | b_s : {b_s:.4f} | ||w_s|| : {np.linalg.norm(w_s):.4f} | Check : {check_s}")
print(f"w_s : {w_s}")

Cycles : 20000 | b_h : -0.2668 | ||w_h|| : 0.4331 | Check : [ 0.12788679  0.03147024  0.21041144 -0.09111731 -0.0110322   0.12707361
  0.09928982  0.22349016]
w_h : [ 0.36769483 -0.22877583]

Cycles : 20000 | b_s : 0.0000 | ||w_s|| : 0.2311 | Check : [-0.32548289 -0.33720313 -0.34630606  0.34023744 -0.65091696  0.99413988
  0.92905307  1.00586012]
w_s : [0.14754548 0.17788858]
