In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from sklearn.metrics import accuracy_score

In [3]:
# Set seed for reproducibility
np.random.seed(42)

# Generate Linearly Separable Data
X1 = np.random.randn(100, 2) + [2, 2]
X2 = np.random.randn(100, 2) + [5, 5]
X_linear = np.vstack((X1, X2))
y_linear = np.array([0]*100 + [1]*100)
df_linear = pd.DataFrame(X_linear, columns=['Temperature', 'Humidity'])
df_linear['Pollution_Level'] = y_linear

# Generate Non-linearly Separable Data (Circular Boundaries)
length = 200
radius = 2
angle = np.linspace(0, 2 * np.pi, length)
X1_circular = np.vstack((np.sin(angle) * radius, np.cos(angle) * radius)).T + np.random.randn(length, 2) * 0.1
X2_circular = np.random.randn(length, 2) * 0.5
X_circular = np.vstack((X1_circular, X2_circular))
y_circular = np.array([0] * length + [1] * length)
df_circular = pd.DataFrame(X_circular, columns=['CO2_Emission', 'Water_Usage'])
df_circular['Area_Type'] = y_circular

# Generate XOR-like Data
X1_xor = np.random.randn(50, 2) + [2, 2]
X2_xor = np.random.randn(50, 2) + [2, 5]
X3_xor = np.random.randn(50, 2) + [5, 2]
X4_xor = np.random.randn(50, 2) + [5, 5]
X_xor = np.vstack((X1_xor, X2_xor, X3_xor, X4_xor))
y_xor = np.array([0]*100 + [1]*100)
df_xor = pd.DataFrame(X_xor, columns=['Species_Count', 'Toxicity_Level'])
df_xor['Habitat_Damage'] = y_xor

# Generate Overlapping Data
X1_overlap = np.random.randn(100, 2) + [3, 3]
X2_overlap = np.random.randn(100, 2) + [4, 4]
X_overlap = np.vstack((X1_overlap, X2_overlap))
y_overlap = np.array([0]*100 + [1]*100)
df_overlap = pd.DataFrame(X_overlap, columns=['Air_Quality', 'Noise_Level'])
df_overlap['Health_Risk'] = y_overlap

# Collect all datasets in a dictionary for easy access
datasets = {
    'linear_data': df_linear,
    'circular_data': df_circular,
    'xor_data': df_xor,
    'overlapping_data': df_overlap
}

In [4]:
# Create widget
def interactive_svm(kernel, C, gamma, data_key):

    # Separate features and target variables
    data = datasets[data_key]
    X = data.iloc[:,  :-1]
    y = data.iloc[:, -1]

    feature_names = X.columns
    target_name = data.columns[-1]

    # Preprocess data
    scaler = StandardScaler()
    X_scaled = pd.DataFrame(scaler.fit_transform(X), columns = X.columns)

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.3, random_state=42)

    # Train the SVM model and predict
    model = SVC(kernel = kernel, C=C, gamma = gamma)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    # Plot the decision boundaries
    if X.shape[1] == 2:
        
        fig, ax = plt.subplots(figsize = (10, 6))

        # Create the mesh for decision boundary

        # Get min and max for x1 and x2 and extend by a unit

        x1_min, x1_max = X_test.iloc[:, 0].min() -1, X_test.iloc[:, 0].max() + 1
        x2_min, x2_max = X_test.iloc[:, 1].min() -1, X_test.iloc[:, 1].max() + 1

        xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 100),
                               np.linspace(x2_min, x2_max, 100))
        
        # predict for the mesh grid
        y_pred_input = np.c_[xx1.ravel(), xx2.ravel()]
        y_pred_input_df = pd.DataFrame(y_pred_input, columns = feature_names[:2])
        y_pred = model.predict(y_pred_input_df)
        y_pred = y_pred.reshape(xx1.shape)

        # Report Accuracies
        print("Accuray SCore\n")
        print(accuracy_score(y_test, predictions))

        # Plot decision boundary using a filled contour plot
        plt.contourf(xx1, xx2, y_pred, alpha = 0.4, cmap = plt.cm.RdYlBu)

        scatter = plt.scatter(X_scaled.iloc[:,0 ], X_scaled.iloc[:,1], c = y, cmap = plt.cm.RdYlBu)

        # Defin the colors used in scatter plot
        colors = [ plt.cm.RdYlBu(i) for i in np.linspace(0, 1, len(np.unique(y)))]

        legend_handles = [Line2D([0], [0], marker = 'o', color = 'w', label = f'{label}',
                                 markerfacecolor = color, markersize = 10, linestyle = 'none') for color, label in zip(colors, np.unique(y))]
        
        # Add the custome legend to the plot
        ax.legend(handles = legend_handles, loc = 'upper right', title = f'{target_name}')

        # Add labels and title

        plt.xlabel(X.columns[0])
        plt.ylabel(X.columns[1])
        plt.title(f'SVM Decisions Boundary (Kernel: {kernel})')
        plt.show()

    return model 

# kernel selector
kernel_widget = widgets.Dropdown(
    value = 'linear',
    options = ['linear', 'rbf', 'poly'],
    description = 'Kernel'
)
        
# Regularization parameter selector
C_widget = widgets.FloatLogSlider(
    value = 1, 
    base = 10,
    min = -3, 
    max = 3, 
    description = 'C (Regularization Parameter)'
)



# Gamma selector
gamma_widget = widgets.Dropdown(

    options = ['scale', 'auto'],
    value = 'scale',
    description = 'Gamma'
)

# Dataset widget
dataset_widget = widgets.Dropdown(
    options = list(datasets.keys()),
    value = 'linear_data',
    description = 'Dataset:'
)

widgets.interactive(
    interactive_svm,
    C = C_widget,
    gamma = gamma_widget,
    data_key = dataset_widget,
    kernel = kernel_widget
)

interactive(children=(Dropdown(description='Kernel', options=('linear', 'rbf', 'poly'), value='linear'), Float…