In [None]:
import os
os.chdir("..")

In [None]:
import polars as pl
import pandas as pd
import geopandas as gpd
import altair as alt
from src.data.data_process import FoodDeseart
from shapely import wkt
from pysal.lib import weights
import spreg
dc = FoodDeseart()

In [None]:
dc.make_dataset()

In [None]:
death_df = dc.process_death()
gdf = dc.food_data()

dp03_df = dc.pull_dp03()
dp03_df = dp03_df.with_columns(qtr=4)
gdf = gdf.merge(
    dp03_df.to_pandas(),
    on=["year", "qtr", "zipcode"],
    how="inner",
    validate="1:1",
)
gdf = gdf.sort_values(by=["zipcode", "year", "qtr"]).reset_index(drop=True)
columns = [
    "total_population",
    "inc_25k_35k",
    "inc_35k_50k",
    "inc_50k_75k",
    "inc_75k_100k",
    "inc_100k_150k",
    "inc_150k_200k",
    "inc_more_200k",
]
for col in columns:
    gdf[col] = gdf.groupby("zipcode")[col].transform(
        lambda group: group.interpolate(method="cubic")
    )
gdf = gdf.merge(
    death_df.to_pandas(),
    on=["year", "qtr", "zipcode"],
    how="left",
    validate="1:1",
)

gdf["paracites_by_pop"] = gdf["paracites_disease"] / gdf["total_population"]
gdf["cancer_by_pop"] = gdf["cancer_disease"] / gdf["total_population"]
gdf["nervous_by_pop"] = gdf["nervous_disease"] / gdf["total_population"]
gdf["respiratory_by_pop"] = gdf["respiratory_disease"] / gdf["total_population"]
gdf["circulatory_by_pop"] = gdf["circulatory_disease"] / gdf["total_population"]

gdf = gdf.sort_values(by=["year", "qtr", "zipcode"]).reset_index(drop=True)
w = weights.Queen.from_dataframe(gdf[(gdf["year"] == 2017) & (gdf["qtr"] == 1)])
spatial_lag_results = []

In [None]:
gdf[gdf["geometry"].isnan()]

In [None]:
group_df = gdf[(gdf["year"] == 2015) & (gdf["qtr"] == 1)].reset_index(
    drop=True
)

In [None]:
dc.process_death().select(pl.col("zipcode")).to_series().unique()

In [None]:
group_df = gdf[(gdf["year"] == 2016) & (gdf["qtr"] == 1)].reset_index(
    drop=True
)
# spatial_paracites = dc.calculate_spatial_lag(
#     group_df, w, "paracites_by_pop"
# )
group_df.count()

In [None]:
for year in range(2016, 2020):
    for qtr in range(1, 5):
        print(f"{year}, {qtr}")
        group_df = gdf[(gdf["year"] == year) & (gdf["qtr"] == qtr)].reset_index(
            drop=True
        )
        spatial_paracites = dc.calculate_spatial_lag(
            group_df, w, "paracites_by_pop"
        )
        spatial_cancer = dc.calculate_spatial_lag(
            group_df, w, "cancer_by_pop"
        )
        spatial_nervouse = dc.calculate_spatial_lag(
            group_df, w, "nervous_by_pop"
        )
        spatial_respiratory = dc.calculate_spatial_lag(
            group_df, w, "respiratory_by_pop"
        )
        spatial_circulatory = dc.calculate_spatial_lag(
            group_df, w, "circulatory_by_pop"
        )
        spatial_supermarkets_and_others_area = dc.calculate_spatial_lag(
            group_df, w, "supermarkets_and_others_area"
        )
        spatial_supermarkets_area = dc.calculate_spatial_lag(
            group_df, w, "supermarkets_area"
        )
        spatial_convenience_retailers_area = dc.calculate_spatial_lag(
            group_df, w, "convenience_retailers_area"
        )
        spatial_whole_foods_area = dc.calculate_spatial_lag(
            group_df, w, "whole_foods_area"
        )
        spatial_total_food_area = dc.calculate_spatial_lag(
            group_df, w, "total_food_area"
        )
        spatial_construction_area = dc.calculate_spatial_lag(
            group_df, w, "construction_area"
        )
        spatial_finance_area = dc.calculate_spatial_lag(
            group_df, w, "finance_area"
        )
        # Add the spatial lag results back to the group dataframe
        group_df["w_paracites"] = spatial_paracites.flatten()
        group_df["w_cancer"] = spatial_cancer.flatten()
        group_df["w_nervouse"] = spatial_nervouse.flatten()
        group_df["w_respiratory"] = spatial_respiratory.flatten()
        group_df["w_circulatory"] = spatial_circulatory.flatten()
        group_df["w_supermarkets_and_others_area"] = (
            spatial_supermarkets_and_others_area.flatten()
        )
        group_df["w_supermarkets_area"] = spatial_supermarkets_area.flatten()
        group_df["w_convenience_retailers_area"] = (
            spatial_convenience_retailers_area.flatten()
        )
        group_df["w_whole_foods_area"] = spatial_whole_foods_area.flatten()
        group_df["w_total_food_area"] = spatial_total_food_area.flatten()
        group_df["w_construction_area"] = spatial_construction_area.flatten()
        group_df["w_finance_area"] = spatial_finance_area.flatten()

        # Append the group to the results list
        spatial_lag_results.append(group_df)
gdf = pd.concat(spatial_lag_results)

In [None]:
df = dc.make_dataset()
df

In [None]:
def calculate_spatial_lag(df, w, column):
    # Reshape y to match the number of rows in the dataframe
    y = df[column].values.reshape(-1, 1)
    
    # Apply spatial lag
    spatial_lag = weights.lag_spatial(w, y)
    
    return spatial_lag

# Initialize an empty list to store results
spatial_lag_results = []

# Assuming `df` has 'year' and 'quarter' columns for grouping
for year in range(2012,2019):
    for qtr in range(1,5):
        group_df = df[(df["year"]== year) & (df["qtr"] == qtr)].reset_index(drop=True)
        spatial_lag_y = calculate_spatial_lag(group_df, w, 'total_employment')
        spatial_lag_x = calculate_spatial_lag(group_df, w, 'k_index')
    
        # Add the spatial lag results back to the group dataframe
        group_df['w_employment'] = spatial_lag_y.flatten()  # Flatten to make it 1D for the column
        group_df['W_k_index'] = spatial_lag_x.flatten()
        
        # Append the group to the results list
        spatial_lag_results.append(group_df)

# Concatenate all the results back together
reg = pd.concat(spatial_lag_results)
reg

In [None]:
# Ensure that 'data' remains a DataFrame
data = df.copy()

# Sort the data by year, quarter, and zipcode
s
data[data["zipcode"] == "00682"].head(10)

In [None]:
w = weights.Queen.from_dataframe(data[(data["year"]== 2017) & (data["qtr"]== 1)])