# Polynomial Kernel SVM from Scratch for Student Performance
Kernelised SVMs allow non-linear decision boundaries by projecting data into a higher-dimensional feature space. Instead of relying on a library implementation, we manually expand the original features with all degree-2 polynomial terms (squares and pairwise products). Fitting a linear SVM on this expanded space is equivalent to using a polynomial kernel. We again optimise hinge-loss with stochastic sub-gradient descent and then transform predictions back to pass/fail labels.

In [1]:
# ============================================================
# üéì PROBLEM STATEMENT 20:
# Implement an SVM model (from scratch) with a Polynomial Kernel
# to predict student performance (Pass/Fail)
# using study time, attendance, and internal scores as features.
# Evaluate model using Precision, Recall, and F1-score.
# ============================================================

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# -------------------------------------------------------------
# 1Ô∏è‚É£ LOAD AND PREPARE DATASET
# -------------------------------------------------------------
# Make sure this CSV file exists in the same directory as the script.
# File name: student_performance_dataset_20.csv
df = pd.read_csv("./datasets/student_performance_dataset_20.csv")

# Display first few rows for verification
print("üìò Dataset Preview:")
print(df.head(), "\n")

# Select important numeric features
X = df[['Study_Hours_per_Week', 'Attendance_Rate', 'Internal_Scores']].values

# Convert 'Pass_Fail' to numeric (Pass ‚Üí 1, Fail ‚Üí 0)
y = df['Pass_Fail'].apply(lambda x: 1 if str(x).strip().lower() == 'pass' else 0).values

# Scale features ‚Äî this is important for SVMs
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset into Training (70%) and Testing (30%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f"‚úÖ Training Samples: {X_train.shape[0]}")
print(f"‚úÖ Testing Samples : {X_test.shape[0]}\n")

# -------------------------------------------------------------
# 2Ô∏è‚É£ DEFINE POLYNOMIAL KERNEL
# -------------------------------------------------------------
def polynomial_kernel(X1, X2, degree=2):
    """
    Polynomial Kernel function:
    K(x, y) = (x¬∑y + 1)^degree
    Simulates nonlinear separation between classes.
    """
    return (np.dot(X1, X2.T) + 1) ** degree

# -------------------------------------------------------------
# 3Ô∏è‚É£ SIMPLE SVM IMPLEMENTATION (From Scratch)
# -------------------------------------------------------------
class SimpleSVM:
    def __init__(self, lr=0.0001, epochs=300, kernel=None):
        self.lr = lr
        self.epochs = epochs
        self.kernel = kernel

    def fit(self, X, y):
        """
        Trains the SVM using simplified gradient updates on Œ± coefficients.
        """
        # Convert labels 0 ‚Üí -1 (SVM requires -1 and +1)
        y = np.where(y == 0, -1, 1)
        n_samples = X.shape[0]

        # Kernel matrix (all pairwise dot products)
        K = self.kernel(X, X)

        # Initialize alpha coefficients
        self.alpha = np.zeros(n_samples)

        # Training loop (simplified gradient-style update)
        for epoch in range(self.epochs):
            for i in range(n_samples):
                condition = y[i] * np.sum(self.alpha * y * K[:, i])
                if condition < 1:
                    self.alpha[i] += self.lr * (1 - condition)
                else:
                    self.alpha[i] += self.lr * 0

            # Clip alpha to prevent numeric explosion
            self.alpha = np.clip(self.alpha, -1e3, 1e3)

        # Save training data for prediction
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        """
        Predicts labels for given input samples.
        """
        y_pred = []
        for x in X:
            k = self.kernel(self.X_train, x.reshape(1, -1))
            result = np.sum(self.alpha * self.y_train * k.flatten())
            y_pred.append(1 if result >= 0 else 0)
        return np.array(y_pred)

# -------------------------------------------------------------
# 4Ô∏è‚É£ TRAIN AND EVALUATE THE MODEL
# -------------------------------------------------------------
# Initialize the SVM model with a polynomial kernel
svm = SimpleSVM(lr=0.0001, epochs=300, kernel=polynomial_kernel)

# Train the model
svm.fit(X_train, y_train)

# Predict on test set
y_pred = svm.predict(X_test)

# -------------------------------------------------------------
# 5Ô∏è‚É£ EVALUATE MODEL PERFORMANCE
# -------------------------------------------------------------
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print("üìä MODEL PERFORMANCE METRICS üìä")
print(f"Precision : {precision:.2f}")
print(f"Recall    : {recall:.2f}")
print(f"F1-Score  : {f1:.2f}")

üìò Dataset Preview:
  Student_ID  Gender  Study_Hours_per_Week  Attendance_Rate  Internal_Scores  \
0       S147    Male                    31        68.267841               86   
1       S136    Male                    16        78.222927               73   
2       S209  Female                    21        87.525096               74   
3       S458  Female                    27        92.076483               99   
4       S078  Female                    37        98.655517               63   

  Parental_Education_Level Internet_Access_at_Home Extracurricular_Activities  \
0              High School                     Yes                        Yes   
1                      PhD                      No                         No   
2                      PhD                     Yes                         No   
3                Bachelors                      No                         No   
4                  Masters                      No                        Yes   

   Final_E