# Interactive Regression Explorer

Use the selector below to choose one or more predictors (e.g. health spending,
physicians per 1,000, smoking prevalence). The model will estimate how these
variables relate to **life expectancy**, and the regression table updates
automatically.

In [26]:
# Make sure to install these libraries
# pip install ipywidgets
# jupyter nbextension enable

In [27]:
# import libraries
import pandas as pd
import statsmodels.api as sm
from ipywidgets import interact, SelectMultiple, Dropdown
import plotly.express as px

In [28]:
# Load dataset
df = pd.read_csv("data/df_enriched.csv")

target_var = "life_expectancy"

candidate_predictors = [
    "health_expenditure_usd",
    "physicians_per_1000",
    "nurses_per_1000",
    "smoking_prevalence",
    "diabetes_prevalence",
    "pollution_mortality_rate"
]

In [29]:
#| echo: false
#| message: false
#| warning: false
#| interactive: true

df = pd.read_csv("data/df_enriched.csv")

target_var = "life_expectancy"

candidate_predictors = [
    "health_expenditure_usd",
    "physicians_per_1000",
    "nurses_per_1000",
    "smoking_prevalence",
    "diabetes_prevalence",
    "pollution_mortality_rate",
]

@interact(
    predictors=SelectMultiple(
        options=candidate_predictors,
        value=("health_expenditure_usd",),   
        description="Select predictors",
    )
)
def run_regression(predictors):
    predictors = list(predictors)

    if len(predictors) == 0:
        print("⚠ Please select at least ONE independent variable.")
        return

    cols = [target_var] + predictors
    df_model = df[cols].dropna()

    if df_model.empty:
        print("⚠ No usable rows after filtering — try fewer predictors.")
        return

    X = df_model[predictors]
    y = df_model[target_var]

    X = sm.add_constant(X)
    model = sm.OLS(y, X).fit()

    print("================================")
    print(f"Dependent variable: {target_var}")
    print(f"Predictors: {', '.join(predictors)}")
    print(f"Observations used: {len(df_model)}")
    print(f"R² = {model.rsquared:.3f}")
    print("================================")
    print(model.summary())


interactive(children=(SelectMultiple(description='Select predictors', index=(0,), options=('health_expenditure…

In [30]:
#| echo: false
#| message: false
#| warning: false
#| interactive: true

numeric_predictors = [
    "health_expenditure_usd",
    "physicians_per_1000",
    "nurses_per_1000",
    "smoking_prevalence",
    "diabetes_prevalence",
    "pollution_mortality_rate",
]

@interact(
    x_var=Dropdown(
        options=numeric_predictors,
        value="health_expenditure_usd",
        description="X variable",
    )
)
def interactive_scatter(x_var):
    df_plot = df[[x_var, target_var, "location_key"]].dropna()

    fig = px.scatter(
        df_plot,
        x=x_var,
        y=target_var,
        hover_name="location_key",
        trendline="ols",
        labels={
            x_var: x_var.replace("_", " ").title(),
            target_var: "Life Expectancy (years)",
        },
        title=f"Life Expectancy vs {x_var.replace('_', ' ').title()}",
    )

    if x_var == "health_expenditure_usd":
        fig.update_xaxes(type="log")

    fig.show()


interactive(children=(Dropdown(description='X variable', options=('health_expenditure_usd', 'physicians_per_10…

In [31]:
#| echo: false
#| message: false
#| warning: false

import pandas as pd
import numpy as np
import plotly.graph_objects as go

df = pd.read_csv("data/df_enriched.csv")

target_var = "life_expectancy"

predictors = [
    "health_expenditure_usd",
    "physicians_per_1000",
    "nurses_per_1000",
    "smoking_prevalence",
    "diabetes_prevalence",
    "pollution_mortality_rate",
]


fig = go.Figure()
buttons = []

for i, pred in enumerate(predictors):
    sub = df[[pred, target_var]].dropna()
    x = sub[pred]
    y = sub[target_var]

    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode="markers",
            name=f"{pred}",
            marker=dict(size=7, opacity=0.7),
            visible=(i == 0),
        )
    )

    m, b = np.polyfit(x, y, 1)
    x_line = np.linspace(x.min(), x.max(), 100)
    y_line = m * x_line + b

    fig.add_trace(
        go.Scatter(
            x=x_line,
            y=y_line,
            mode="lines",
            name=f"{pred} trend",
            line=dict(dash="dash"),
            visible=(i == 0),
        )
    )

    vis = [False] * (2 * len(predictors))
    vis[2 * i] = True
    vis[2 * i + 1] = True

    pretty_name = pred.replace("_", " ").title()

    buttons.append(
        dict(
            label=pretty_name,
            method="update",
            args=[
                {"visible": vis},
                {
                    "title": f"{pretty_name} vs Life Expectancy",
                    "xaxis": {"title": pretty_name},
                    "yaxis": {"title": "Life Expectancy (years)"},
                },
            ],
        )
    )

fig.update_layout(
    title="Interactive Regression Explorer",
    xaxis_title=predictors[0].replace("_", " ").title(),
    yaxis_title="Life Expectancy (years)",
    updatemenus=[
        dict(
            type="dropdown",
            x=0.5,
            xanchor="center",
            y=1.15,
            yanchor="top",
            buttons=buttons,
            showactive=True,
        )
    ],
    margin=dict(l=50, r=20, t=80, b=60),
)

fig
