<a href="https://colab.research.google.com/github/khadijaatiq/ML-Algos/blob/main/SVC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# --- DATASET 1: LINEARLY SEPARABLE (Simple clusters) ---
# Imagine two groups of patients: Group A (Low BP, Low Cholesterol), Group B (High BP, High Cholesterol)
data_linear = {
    'Feature_1': [1, 2, 2, 1, 8, 9, 8, 9], # e.g., Blood Pressure
    'Feature_2': [1, 1, 2, 2, 8, 8, 9, 9], # e.g., Cholesterol
    'Target':    [0, 0, 0, 0, 1, 1, 1, 1]  # 0 = Healthy, 1 = Risk
}
df_linear = pd.DataFrame(data_linear)

# --- DATASET 2: NON-LINEAR (Complex/Circles) ---
# Imagine a disease that affects people with EXTREME values (very high OR very low), but average people are safe.
# This creates a "ring" or "bullseye" pattern that a straight line cannot cut.
data_nonlinear = {
    'Feature_1': [0, 0, 1, -1,  4, -4, 4, -4], # Center vs Corners
    'Feature_2': [0, 1, 0, -1,  4, 4, -4, -4],
    'Target':    [0, 0, 0,  0,  1, 1, 1,  1]   # 0 = Center (Safe), 1 = Corners (Risk)
}
df_nonlinear = pd.DataFrame(data_nonlinear)

print("Datasets Created Successfully.")
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 1. Setup Data
X = df_linear[['Feature_1', 'Feature_2']]
y = df_linear['Target']

# 2. Create Linear SVM Model
# kernel='linear': Tells math to use Dot Product <x1, x2> directly
# C=1.0: Hard Margin vs Soft Margin (1.0 is standard balance)
model_linear = SVC(kernel='linear', C=1.0)

# 3. Train (Find the best w and b)
model_linear.fit(X, y)

# 4. Predict
predictions = model_linear.predict(X)

# 5. THEORY CHECK: Look at the Support Vectors
print(f"Accuracy: {accuracy_score(y, predictions)}")
print("\n--- THEORY VALIDATION ---")
print(f"Weights (w): {model_linear.coef_}")
print(f"Bias (b): {model_linear.intercept_}")
print(f"Support Vectors (The points defining the line):\n{model_linear.support_vectors_}")
# You should see points like [2,2] and [8,8] here because they are on the edge.

# 1. Setup Data (Non-linear dataset)
X_nl = df_nonlinear[['Feature_1', 'Feature_2']]
y_nl = df_nonlinear['Target']

# 2. Create Non-Linear SVM Model
# kernel='rbf': Activates the "Kernel Trick" (Gaussian transformation)
# gamma='scale': Controls the shape of the "bell curve" around points.
# High gamma = complex boundary (potential overfitting), Low gamma = smoother.
model_rbf = SVC(kernel='rbf', C=1.0)

# 3. Train
model_rbf.fit(X_nl, y_nl)

# 4. Predict
predictions_nl = model_rbf.predict(X_nl)

# 5. THEORY CHECK
print(f"Accuracy: {accuracy_score(y_nl, predictions_nl)}")
print("\n--- THEORY VALIDATION ---")
# Note: RBF SVM does NOT have simple 'coef_' (w) weights like Linear SVM
# because the data is now in infinite dimensions!
# If you try to print model_rbf.coef_, it will crash (Theory Proof).
try:
    print(model_rbf.coef_)
except AttributeError:
    print("Theory Confirmed: RBF Kernel operates in high-dimensional space, so there is no simple 'w' vector to print.")

print(f"Number of Support Vectors: {model_rbf.n_support_}")

Datasets Created Successfully.
Accuracy: 1.0

--- THEORY VALIDATION ---
Weights (w): [[0.16666667 0.16666667]]
Bias (b): [-1.66666667]
Support Vectors (The points defining the line):
[[2. 2.]
 [8. 8.]]
Accuracy: 1.0

--- THEORY VALIDATION ---
Theory Confirmed: RBF Kernel operates in high-dimensional space, so there is no simple 'w' vector to print.
Number of Support Vectors: [3 4]
