# AA1 lab 09 -- warm up towards SVMs

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Generate synthetic 2D classification data
X, y = make_classification(
    n_samples=60,
    n_features=2,
    n_redundant=0,
    n_clusters_per_class=1,
    random_state=42,
    hypercube=True,
    class_sep=1.5,
)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=42)

# Train Perceptron
perceptron = Perceptron()
perceptron.fit(X_train, y_train)

# Train Linear SVM
svm = SVC(kernel="linear", C=10**10)
svm.fit(X_train, y_train)

# Train Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)


# Plot decision boundaries
def plot_decision_boundary(model, model_name):
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    fig, axs = plt.subplots(1, 2, figsize=(6, 3))

    # Scatter plot 1
    axs[0].contourf(xx, yy, Z, alpha=0.6)
    axs[0].scatter(
        X_train[:, 0], X_train[:, 1], c=y_train, edgecolors="k", cmap=plt.cm.Paired
    )
    axs[0].set_title(
        f"{model_name}\ntraining accuracy: {accuracy_score(y_train, model.predict(X_train)):.3f}"
    )

    # Scatter plot 2
    axs[1].contourf(xx, yy, Z, alpha=0.6)
    axs[1].scatter(
        X_val[:, 0],
        X_val[:, 1],
        c=y_val,
        edgecolors="k",
        marker="s",
        cmap=plt.cm.Paired,
    )
    axs[1].set_title(
        f"{model_name}\nvalidation accuracy: {accuracy_score(y_val, model.predict(X_val)):.3f}"
    )

    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()


# Plot decision boundaries and scatter plot for each model
plot_decision_boundary(perceptron, f"Perceptron")
plot_decision_boundary(svm, f"Linear SVM")
plot_decision_boundary(log_reg, f"Logistic Regression")

### Suggested exercises:

- Play around with the previous script changing the data generated and see how this affects the different decision boundaries of the three linear classifiers compared

- Figure out the relationship between `Perceptron`, `Linear SVM`, and `Logistic regression` in the context of the [SGDClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html#sklearn.linear_model.SGDClassifier) of `scikit-learn`

- Rewrite the previous script using only the `SGDClassifier` with appropriate parameters to obtain the three models presented here