## Plot 2D Partial Dependence

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
import pandas as pd

# Load the dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df, data.target, test_size=0.2, random_state=42
)

In [None]:
model = GradientBoostingRegressor(
    n_estimators=100,
    max_depth=4,
    learning_rate=0.1,
    loss="huber",
    random_state=42,
)
model.fit(X_train, y_train)

In [None]:
from eda_toolkit import plot_2d_pdp

# Feature names
names = data.feature_names

# Generate 2D partial dependence plots
plot_2d_pdp(
    model=model,
    X_train=X_train,
    feature_names=names,
    features=[
        "MedInc",
        "AveOccup",
        "HouseAge",
        "AveRooms",
        "Population",
        ("AveOccup", "HouseAge"),
    ],
    title="PDP of house value on CA non-location features",
    grid_figsize=(14, 10),
    individual_figsize=(12, 4),
    label_fontsize=14,
    tick_fontsize=12,
    text_wrap=120,
    plot_type="grid",
    # image_path_png="path/to/save/png",
    # save_plots="all",
)

## Plot 3D PDP for two features

In [None]:
from eda_toolkit import plot_3d_pdp

plot_3d_pdp(
    model=model,
    dataframe=X_test,
    feature_names_list=["HouseAge", "AveOccup"],
    x_label="House Age",
    y_label="Average Occupancy",
    z_label="Partial Dependence",
    title="3D Partial Dependence Plot of House Age vs. Average Occupancy",
    image_filename="3d_pdp",
    plot_type="static",
    figsize=[8, 5],
    text_wrap=40,
    wireframe_color="black",
    image_path_png=image_path_png,
    grid_resolution=30,
)