# Polynomial Kernel SVM from Scratch for Student Performance
Kernelised SVMs allow non-linear decision boundaries by projecting data into a higher-dimensional feature space. Instead of relying on a library implementation, we manually expand the original features with all degree-2 polynomial terms (squares and pairwise products). Fitting a linear SVM on this expanded space is equivalent to using a polynomial kernel. We again optimise hinge-loss with stochastic sub-gradient descent and then transform predictions back to pass/fail labels.

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
data = pd.DataFrame({
    "study_time": [1, 2, 3, 4, 5, 2, 6, 7, 3, 8, 5, 4],
    "absences": [8, 6, 5, 4, 3, 9, 2, 1, 7, 1, 3, 2],
    "internal_score": [50, 55, 60, 65, 70, 48, 75, 80, 58, 85, 72, 68],
    "passed": [0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1]
})

train_df = data.sample(frac=0.75, random_state=7)
test_df = data.drop(train_df.index)

print("Training size:", len(train_df))
print("Testing size:", len(test_df))

In [None]:
def polynomial_features(X):
    """Expand features to degree-2 polynomial terms (simulates a polynomial kernel)."""
    linear = X
    squares = X ** 2
    interactions = []
    for i in range(X.shape[1]):
        for j in range(i + 1, X.shape[1]):
            interactions.append((X[:, i] * X[:, j]).reshape(-1, 1))
    if interactions:
        interactions = np.hstack(interactions)
        return np.hstack([linear, squares, interactions])
    return np.hstack([linear, squares])

X_train_raw = train_df[["study_time", "absences", "internal_score"]].to_numpy().astype(float)
X_test_raw = test_df[["study_time", "absences", "internal_score"]].to_numpy().astype(float)

# Scale features to roughly similar ranges
X_train_raw = (X_train_raw - X_train_raw.mean(axis=0)) / X_train_raw.std(axis=0)
X_test_raw = (X_test_raw - X_train_raw.mean(axis=0)) / X_train_raw.std(axis=0)

X_train = polynomial_features(X_train_raw)
X_test = polynomial_features(X_test_raw)

y_train = train_df["passed"].apply(lambda v: 1 if v == 1 else -1).to_numpy()
y_test = test_df["passed"].apply(lambda v: 1 if v == 1 else -1).to_numpy()

print("Expanded feature dimension:", X_train.shape[1])

In [None]:
def train_linear_svm(X, y, learning_rate=0.02, lambda_reg=0.02, epochs=60):
    weights = np.zeros(X.shape[1])
    bias = 0.0

    for epoch in range(epochs):
        for xi, yi in zip(X, y):
            condition = yi * (np.dot(xi, weights) + bias)
            if condition >= 1:
                # Within the margin we only decay weights due to regularisation
                weights -= learning_rate * (2 * lambda_reg * weights)
            else:
                # Misclassified samples push the hyperplane toward the correct side
                weights -= learning_rate * (2 * lambda_reg * weights - yi * xi)
                bias += learning_rate * yi
    return weights, bias

def predict_linear_svm(X, weights, bias):
    scores = X @ weights + bias
    return np.where(scores >= 0, 1, -1)

weights, bias = train_linear_svm(X_train, y_train)
y_pred = predict_linear_svm(X_test, weights, bias)

print("Confusion matrix:")
print(confusion_matrix((y_test == 1).astype(int), (y_pred == 1).astype(int)))

print("\nDetailed report:")
print(classification_report((y_test == 1).astype(int), (y_pred == 1).astype(int), target_names=["fail", "pass"]))