# Introduction

- A spatial model is definde in the following way:
$$
y_{it} = \beta X_{it} + \rho \sum_{j=1}^{N} w_{it}
$$

In [None]:
import os 
os.chdir("..")

In [None]:
import numpy as np
import pymc as pm
import arviz as az
import bambi as bmb
import statsmodels.api as sm

from src.data.data_reg import SpatialReg
import matplotlib.pyplot as plt
az.style.use("arviz-darkgrid")


num = 10
rho = .8
sr = SpatialReg()

In [None]:
master = sr.spatial_simulation(time=10,rho=0.7, simulations=2, start_seed=787)

In [None]:
sr.results

In [None]:
master

In [None]:
gdf = sr.spatial_df()
n_obs = len(gdf)
gdf.plot()

In [None]:
gdf = sr.spatial_panel(rho=0.7, time=50, seed=787)

In [None]:
gdf

In [None]:
gdf[gdf["time"]==0].plot("y_true")

In [None]:
gdf[gdf["time"]==1].plot("X_3")

In [None]:
xb = gdf[["X_1", "X_2", "X_3", "w_rook"]].values.reshape(-1,4)
y_true = gdf["y_true"].values.reshape(-1,1)
X = sm.add_constant(xb)
results = sm.OLS(y_true, X).fit()
print(results.summary())

In [None]:
results.resid

In [None]:
df = gdf.drop("geometry", axis=1)
y_true = df["y_true"].values
X_1 = df["X_1"].values
X_2 = df["X_2"].values
X_3 = df["X_3"].values
w = df["w_queen"].values

with pm.Model() as model:
    # Define Priors
    sigma = pm.HalfCauchy("sigma", beta=10)
    intercept = pm.Normal("intercept", 0, sigma=20)
    beta_1 = pm.Normal("X_1", 0, sigma=10)
    beta_2 = pm.Normal("X_2", 0, sigma=10)
    beta_3 = pm.Normal("X_3", 0, sigma=10)
    rho = pm.Normal("rho", 0, sigma=10)

    # Define likelihood
    likeligood = pm.Normal("y_true", mu=intercept + beta_1 * X_1 + beta_2 * X_2 + beta_3 * X_3 + rho*w, sigma=sigma, observed=y_true)

    idata = pm.sample(draws=3000, chains=10,cores=10)

    posterior_predictive = pm.sample_posterior_predictive(idata).posterior_predictive



In [None]:
fig, ax = plt.subplots(figsize=(7, 7))
posterior = az.extract(idata, num_samples=20)
x_plot = xr.DataArray(np.linspace(1, 2, 100))
y_plot = posterior["b"] * x_plot + posterior["a"]
Line2 = ax.plot(x_plot, y_plot.transpose(), color="C1")
Line1 = ax.plot(x_pred, pred_mean, "x")
ax.set(title="Posterior predictive regression lines", xlabel="x", ylabel="y")
ax.legend(
    handles=[Line1[0], Line2[0]], labels=["predicted average", "inferred regression line"], loc=0
);

In [None]:
y_hat_mean

In [None]:
len(df)

In [None]:
residuals

In [None]:
# Now we extract the posterior predictive samples
posterior_predictive = pm.sample_posterior_predictive(idata, model=model)

# The predicted values (y_pred) are stored in posterior_predictive['y_true']
y_pred_samples = posterior_predictive.posterior_predictive['y_true'].values

# Compute residuals for each posterior sample
# Residuals = y_true - predicted values

# Compute residuals for all posterior samples
residuals = y_true - y_pred_samples

# You can compute summary statistics of the residuals, such as mean and standard deviation
mean_residuals = np.mean(residuals, axis=0)
std_residuals = np.std(residuals, axis=0)

print("Mean Residuals: ", mean_residuals)
print("Standard Deviation of Residuals: ", std_residuals)

In [None]:
mean_residuals.mean()

In [None]:
az_queen = az.summary(idata, hdi_prob=0.95)
bayes_queen = az_queen["mean"]

In [None]:
bayes_knn6["X_1"]

In [None]:
az.plot_trace(idata)
az.summary(idata)

In [None]:
df = gdf.drop("geometry", axis=1)
priors = {
    "w_queen": bmb.Prior("Normal", mu=0, sigma=2),
}
model = bmb.Model(
    "y_true ~ 1 + X_1 + X_2 + X_3 + w_queen",
    priors=priors,
    data=df, 
    dropna=True
)
results = model.fit()

In [None]:
az.plot_trace(results)
az.summary(results)

In [None]:
az_summary = az.summary(results, hdi_prob=0.95)
az_summary["mean"]

In [None]:
gdf["centroid"] = gdf.centroid
gdf["lat"] = gdf["centroid"].x
gdf["lon"] = gdf["centroid"].y
df = gdf.drop("geometry", axis=1)
df

In [None]:
X = df[["X1","lat","lon"]].values.reshape(-1,3)
y = df["y_d"].values.reshape(-1,1)
X

In [None]:
# Sort and extract variables
gdf = gdf.sort_values(["time", "id"]).reset_index(drop=True)

# Encode spatial unit ids as integers 0..N-1
gdf["unit_id"] = gdf["id"].astype("category").cat.codes
N = gdf["unit_id"].nunique()
T = gdf["time"].nunique()

y = gdf["y_d"].values
X1 = gdf["X1"].values
Wy = gdf["w_d"].values
unit_idx = gdf["unit_id"].values

In [None]:
with pm.Model() as model:
    # Hyperpriors
    sigma = pm.HalfNormal("sigma", 2.0)
    tau_rho = pm.HalfNormal("tau_rho", 1.0)
    tau_mu = pm.HalfNormal("tau_mu", 1.0)

    # Priors
    beta = pm.Normal("beta", mu=0, sigma=5)
    rho_i = pm.Normal("rho", mu=0, sigma=tau_rho, shape=N)     # one rho per unit
    mu_i = pm.Normal("mu", mu=0, sigma=tau_mu, shape=N)         # one intercept per unit

    # Create shared inputs
    X_data = pm.Data("X1", X1)
    Wy_data = pm.Data("Wy", Wy)
    unit_idx_data = pm.Data("unit_idx", unit_idx)

    # Compute mu_y
    mu_y = rho_i[unit_idx_data] * Wy_data + beta * X_data + mu_i[unit_idx_data]

    # Likelihood
    y_obs = pm.Normal("y_obs", mu=mu_y, sigma=sigma, observed=y)

    trace = pm.sample(1000, tune=1000, target_accept=0.9, return_inferencedata=True)

In [None]:
# az.plot_trace(trace, var_names=["rho", "beta", "sigma"])
# az.summary(trace, var_names=["rho", "beta", "sigma"])

In [None]:
rho_true = .8
summary = az.summary(trace, var_names=["rho"], hdi_prob=0.94)
within_hdi = (rho_true >= summary["hdi_3%"]) & (rho_true <= summary["hdi_97%"])

# Report results
all_contain = within_hdi.all()
num_pass = within_hdi.sum()
num_total = len(within_hdi)

print(f"True rho = {rho_true}")
print(f"{num_pass}/{num_total} HDIs contain true rho.")

# Optionally, list which units failed
if not all_contain:
    failed_units = np.where(~within_hdi)[0]
    print(f"Units failing HDI test: {failed_units}")