In [None]:
import os
os.chdir("..")

In [None]:
from src.data.data_process import DataReg
import polars as pl
import requests
import spreg
from linearmodels.panel import PanelOLS
import statsmodels.formula.api as smf
import bambi as bmb
import arviz as az 
import geopandas as gpd
from pysal.lib import weights
from shapely import wkt
import pandas as pd

import matplotlib.dates as mdates
import numpy as np
from pysal.lib import cg as geometry
import causalpy as cp
import matplotlib.pyplot as plt
dr = DataReg()

In [None]:
df = dr.make_dataset()
df = df.sort_values(by=["year","qtr","zipcode"]).reset_index(drop=True)
df

In [None]:
# w = weights.distance.DistanceBand.from_dataframe(df[(df["year"]== 2012) & (df["qtr"]== 1)], 1609.344 * 20, binary=True)
w = weights.Queen.from_dataframe(df[(df["year"]== 2012) & (df["qtr"]== 1)])

In [None]:
def calculate_spatial_lag(df, w, column):
    # Reshape y to match the number of rows in the dataframe
    y = df[column].values.reshape(-1, 1)
    
    # Apply spatial lag
    spatial_lag = weights.lag_spatial(w, y)
    
    return spatial_lag

# Initialize an empty list to store results
spatial_lag_results = []

# Assuming `df` has 'year' and 'quarter' columns for grouping
for year in range(2012,2019):
    for qtr in range(1,5):
        group_df = df[(df["year"]== year) & (df["qtr"] == qtr)].reset_index(drop=True)
        spatial_lag_y = calculate_spatial_lag(group_df, w, 'total_employment')
        spatial_lag_x = calculate_spatial_lag(group_df, w, 'k_index')
    
        # Add the spatial lag results back to the group dataframe
        group_df['w_employment'] = spatial_lag_y.flatten()  # Flatten to make it 1D for the column
        group_df['W_k_index'] = spatial_lag_x.flatten()
        
        # Append the group to the results list
        spatial_lag_results.append(group_df)

# Concatenate all the results back together
reg = pd.concat(spatial_lag_results)
reg

In [None]:
# Assuming `reg` is your DataFrame and you want to set the 'year' column as a datetime index
data = reg.copy()
data

In [None]:
mod = smf.ols("total_employment ~ 0 + k_index + W_k_index + w_employment", data=data).fit()
print(mod.summary())

In [None]:
y = data["total_employment"].values.reshape(-1,1)
x = data["k_index"].values.reshape(-1,1)

In [None]:
fe_lag = spreg.Panel_FE_Lag(y, x, w)
print(fe_lag.summary)

In [None]:
spreg.spsearch.gets_sdm(y,x,w)

In [None]:
model = bmb.Model(
    "total_employment ~ 0 + k_index + W_k_index + w_employment + (1|zipcode)",
    data, dropna=True
)
results = model.fit(target_accept=0.95)

In [None]:
# model = bmb.Model("total_employment ~ k_index + date + (1 + k_index|zipcode) + w_emplyment + inc_less_10k + inc_10k_15k + inc_15k_25k + inc_25k_35k + inc_35k_50k +  inc_50k_75k + inc_75k_100k + inc_100k_150k + inc_150k_200k", data, dropna=True)
# 

In [None]:
model.plot_priors()

In [None]:
# Plot posteriors
az.plot_trace(
    results,
    compact=True,
)

In [None]:
res = az.summary(results)
res

In [None]:
az.plot_forest(results, combined=True, hdi_prob=0.94)