In [12]:
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

print(f"Scikit-learn version: {sklearn.__version__}")
print("Libraries imported successfully!")

Scikit-learn version: 1.6.1
Libraries imported successfully!


In [13]:
# Enable interactive diagrams for all sklearn estimators
sklearn.set_config(display="diagram")

print("Interactive diagram display enabled!")
print("All sklearn estimators will now render as interactive diagrams.")

Interactive diagram display enabled!
All sklearn estimators will now render as interactive diagrams.


In [14]:
# Create a simple estimator
simple_estimator = LinearRegression()

# Display the estimator as an interactive diagram
# The estimator will be rendered as a diagram when this cell is executed
simple_estimator

In [15]:
# numerical preprocessing pipeline
num_pipeline = Pipeline([
    ('scaler', StandardScaler()),           # Standardize features
    ('feature_selection', SelectKBest(f_regression, k=5)),  # Select top 5 features
    ('pca', PCA(n_components=3))           # Reduce dimensions to 3
])

print("Numerical preprocessing pipeline created!")
print("Pipeline steps:")
for step_name, step_transformer in num_pipeline.steps:
    print(f"  - {step_name}: {step_transformer.__class__.__name__}")

Numerical preprocessing pipeline created!
Pipeline steps:
  - scaler: StandardScaler
  - feature_selection: SelectKBest
  - pca: PCA


In [16]:
# Visualize the numerical preprocessing pipeline
# This will render as an interactive diagram
num_pipeline

In [17]:
# Create sample data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a complete pipeline with preprocessing and model
complete_pipeline = Pipeline([
    ('preprocessing', Pipeline([
        ('scaler', StandardScaler()),
        ('feature_selection', SelectKBest(f_regression, k=5)),
        ('pca', PCA(n_components=3))
    ])),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

print("Complete machine learning pipeline created!")
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

Complete machine learning pipeline created!
Training data shape: (800, 10)
Test data shape: (200, 10)


In [18]:
# Visualize the complete pipeline
# This shows the nested structure with preprocessing and model steps
complete_pipeline

In [19]:
# Fit the pipeline to see more details in the diagram
complete_pipeline.fit(X_train, y_train)

# Make predictions to show the pipeline in action
predictions = complete_pipeline.predict(X_test)

print(f"Pipeline fitted successfully!")
print(f"First 5 predictions: {predictions[:5]}")
print(f"Mean squared error: {np.mean((predictions - y_test)**2):.4f}")

# Display the fitted pipeline - now it will show more details about the fitted components
complete_pipeline

Pipeline fitted successfully!
First 5 predictions: [  -9.35306926    6.6528124    48.8690044  -125.49113083 -125.49706126]
Mean squared error: 13593.2073


In [20]:
# A different type of pipeline
alternative_pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', LinearRegression())
])

print("Alternative pipeline created!")
print("Try clicking on each component in the diagram below:")

# Display the alternative pipeline
alternative_pipeline

Alternative pipeline created!
Try clicking on each component in the diagram below:
