# Setup

In [1]:
# Third party imports
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import RidgeCV, ElasticNetCV
from sklearn.model_selection import cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Local imports
from asboostreg import SparseAdditiveBoostingRegressor

In [None]:
pd.options.plotting.backend = "plotly"

# Loading the California housing dataset

In [None]:
X, y = fetch_california_housing(return_X_y=True, as_frame=True)
X.head()

In [None]:
def evaluate(model):
    df = pd.DataFrame(
        cross_validate(model, X, y, cv=5, scoring="r2", return_train_score=True)
    )
    intervals = df.mean().round(2).astype(str) + " ± " + df.std().round(2).astype(str)
    model.fit(X, y)
    return pd.DataFrame({"Mean ± SD": intervals})

# High interpretability models

In [None]:
ridge = make_pipeline(StandardScaler(), RidgeCV())
evaluate(ridge)

In [None]:
fig = pd.Series(index=X.columns, data=ridge[1].coef_).sort_values().plot.barh()
fig.update_layout(
    title="Ridge model feature importances",
    xaxis_title="Importance",
    yaxis_title="Feature",
)

# Sparse interpretable models

In [None]:
elasticnet = make_pipeline(StandardScaler(), ElasticNetCV())
evaluate(elasticnet)

In [None]:
fig = pd.Series(index=X.columns, data=elasticnet[1].coef_).sort_values().plot.barh()
fig.update_layout(
    title="Elastic Net model feature importances",
    xaxis_title="Importance",
    yaxis_title="Feature",
)

In [None]:
sparsereg = SparseAdditiveBoostingRegressor(
    learning_rate=0.8,
    n_estimators=100,
    l2_regularization=2.0,
    max_depth=6,
    random_state=0,
    n_iter_no_change=30,
)
evaluate(sparsereg)

In [None]:
sparsereg.fit(X, y)

In [None]:
sparsereg.plot_model_information()

In [None]:
sparsereg.explain(X)

In [None]:
sparsereg.contribution_frame(X).head()