___

# Machine Learning in Geosciences ] 
Department of Applied Geoinformatics and Carthography, Charles University

Lukas Brodsky lukas.brodsky@natur.cuni.cz


# Interacxtive Exploration of the Fundamental ML Algorithms with Scikit-learn

This code lets you select the dataset simulation method, change the number of samples and noise level, choose a model, and adjust its hyperparameters. The plot title updates to show both the model type and its hyperparameter values.

* k-Nearest Neighbors (KNN)

* Decision Trees

* Support Vector Machines (SVM)

* Artificial Neural Networks (ANN) 


#### KNN 
* `n_neighbors` int, default=5 

#### Decition Tree Classifier 
* `max_depth` int, default=None

#### SVC
* `C` float, default=1.0 (Regularization parameter. The strength of the regularization is inversely proportional to C.)
* `gamma`{‘scale’, ‘auto’} or float, default=’scale’ Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.

#### MLPClassifier
* `hidden_layer_sizes` array-like of shape(n_layers - 2,), default=(100,) The ith element represents the number 

In [62]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.datasets import make_moons, make_classification, make_circles
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import ipywidgets as widgets
from IPython.display import display

### Update plot while generating synthetic data and fitting the model

In [63]:
# Function to generate dataset and train model
def update_plot(n_samples, noise, dataset_type, model_type, dt_max_depth, svm_C, svm_gamma, knn_neighbors, ann_depth):
    # Generate dataset based on the selected simulation method
    if dataset_type == 'make_classification':
        X, y = make_classification(n_samples=n_samples, n_features=2, n_redundant=0,
                                   n_informative=2, n_clusters_per_class=1, flip_y=noise, random_state=42)
    elif dataset_type == 'make_moons':
        X, y = make_moons(n_samples=n_samples, noise=noise, random_state=42)
    elif dataset_type == 'make_circles':
        X, y = make_circles(n_samples=n_samples, noise=noise, factor=0.5, random_state=42)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Choose and configure model based on selection and hyperparameters
    if model_type == 'Decision Tree':
        hyper_info = f"max_depth = {dt_max_depth}"
        model = DecisionTreeClassifier(max_depth=dt_max_depth, random_state=42)
    elif model_type == 'SVM':
        hyper_info = f"C = {svm_C}, gamma = {svm_gamma}"
        model = SVC(kernel='rbf', C=svm_C, gamma=svm_gamma, random_state=42)
    elif model_type == 'KNN':
        hyper_info = f"n_neighbors = {knn_neighbors}"
        model = KNeighborsClassifier(n_neighbors=knn_neighbors)
    elif model_type == 'ANN':
        hyper_info = f"depth of layers = {ann_depth}"
        model = MLPClassifier(hidden_layer_sizes=(50,)*ann_depth,
                              activation='relu', solver='adam', max_iter=1000, random_state=42)
    
    model.fit(X_train, y_train)
    plot_decision_boundary(model, X_train, y_train, model_type, hyper_info)

### Plot decision boundary

In [64]:
# Function to plot decision boundary
def plot_decision_boundary(model, X, y, model_type, hyper_info):
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))
    
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize=(6, 4))
    plt.contourf(xx, yy, Z, alpha=0.3)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', cmap=plt.cm.bwr)
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.title(f"{model_type} Classification Boundary ({hyper_info})")
    plt.show()

In [65]:
# Create interactive widgets for dataset simulation and model hyperparameters
dataset_selector = widgets.Dropdown(options=['make_classification', 'make_moons', 'make_circles'], 
                                    value='make_moons', description='Dataset')
n_samples_slider = widgets.IntSlider(min=50, max=500, step=10, value=200, description='n_samples')
noise_slider = widgets.FloatSlider(min=0.0, max=0.5, step=0.05, value=0.2, description='noise')
model_selector = widgets.Dropdown(options=['Decision Tree', 'SVM', 'KNN', 'ANN'], 
                                  value='Decision Tree', description='Model')

In [66]:
# Hyperparameter widgets with names adjusted according to the model
dt_max_depth_slider = widgets.IntSlider(min=1, max=10, step=1, value=4, description='max_depth')
svm_C_slider = widgets.FloatSlider(min=0.1, max=10.0, step=0.1, value=1.0, description='C')
svm_gamma_slider = widgets.FloatSlider(min=0.001, max=1.0, step=0.001, value=0.01, description='gamma')
knn_neighbors_slider = widgets.IntSlider(min=1, max=20, step=1, value=5, description='n_neighbors')
ann_depth_slider = widgets.IntSlider(min=1, max=10, step=1, value=2, description='depth of layers')

In [67]:
# Arrange all widgets in a vertical box
ui = widgets.VBox([
    dataset_selector, n_samples_slider, noise_slider, model_selector,
    dt_max_depth_slider, svm_C_slider, svm_gamma_slider,
    knn_neighbors_slider, ann_depth_slider
])

In [69]:
# Link the widgets to the update_plot function using interactive_output
out = widgets.interactive_output(update_plot, {
    'n_samples': n_samples_slider,
    'noise': noise_slider,
    'dataset_type': dataset_selector,
    'model_type': model_selector,
    'dt_max_depth': dt_max_depth_slider,
    'svm_C': svm_C_slider,
    'svm_gamma': svm_gamma_slider,
    'knn_neighbors': knn_neighbors_slider,
    'ann_depth': ann_depth_slider
})

display(ui, out)

VBox(children=(Dropdown(description='Dataset', index=2, options=('make_classification', 'make_moons', 'make_ci…

Output()

In [32]:
# Create interactive plot
# interactive_plot = widgets.interactive(update_plot, n_samples=n_samples_slider, noise=noise_slider, max_depth=max_depth_slider, model_type=model_selector)

# Display widgets and plot
# display(n_samples_slider, noise_slider, max_depth_slider, model_selector, interactive_plot)