# Binscatter Examples

In [None]:
from pathlib import Path

import polars as pl
from binscatter import binscatter
import plotly.express as px

data_dir = Path("../artifacts")
assert data_dir.exists(), f"Data directory {data_dir.resolve()} does not exist."
pl.Config.set_tbl_rows(6)

In [None]:
## Read data
lightgbm_df = (
    pl.read_parquet(data_dir / "optuna_lightgbm_trials.parquet")
)
elasticnet_df = (
    pl.read_parquet(data_dir / "optuna_elasticnet_trials.parquet")
)
state_data = (
    pl.read_parquet(data_dir / "state_data_processed.parquet")
)

State Ã— year panel used in the README example

In [None]:
state_data.head()

In [None]:
p_binscatter_bare = binscatter(
    state_data,
    "mtr90_lag3",
    "lnpat",
    title="No controls"
)
controls = [
    "top_corp_lag3",
    "real_gdp_pc",
    "population_density",
    "rd_credit_lag3",
    "statenum",
    "year",
]
p_binscatter_controls = binscatter(
    state_data,
    "mtr90_lag3",
    "lnpat",
    controls=controls,
    title="Controling for covariate totally changes shape of conditional mean"
)
p_binscatter_bare.show()
p_binscatter_controls.show()

## Optuna ElasticNet trials 

Each row summarizes a single ElasticNet trial (alpha, l1_ratio, RMSE, runtime).

In [None]:
elasticnet_df.head()

In [None]:
fig_elastic = binscatter(
    elasticnet_df,
    "alpha",
    "rmse",
    poly_line=1
)
fig_elastic

In [None]:
fig_lightgbm = binscatter(
    lightgbm_df,
    x="learning_rate",
    y="rmse",
    title="Basic binscatter, learning rate vs rmse controls in lightgbm",
).show()

fig_lightgbm = binscatter(
    lightgbm_df,
    x="learning_rate",
    y="rmse",
    controls=[
        "num_leaves",
        "min_child_samples",
        "feature_fraction",
        "lambda_l1",
    ],
    title="Here controls just add precision but dont change the shape<br>",
).show()


px.scatter(
      lightgbm_df,
    x="learning_rate",
    y="rmse",
    title="Raw data - scatter"
).show()

fig_lightgbm