In [None]:
import os 
os.chdir("..")

In [None]:
import polars as pl
import geopandas as gpd
import pandas as pd
import arviz as az
import numpy as np
import bambi as bmb
from src.data_pull import DataPull
import requests

import arviz as az

import causalpy as cp

az.style.use("arviz-darkgrid")


dp = DataPull(database_file="data.ddb")

In [None]:
df_min = dp.pull_min_wage()
df_min

In [None]:
df_min = dp.pull_min_wage()
df_min = df_min.with_columns(min_wage=pl.col("min_wage").str.replace("$","",literal=True))
df_shpae = pl.from_pandas(dp.pull_states_shapes().drop("geometry", axis=1))
df_min = df_min.join(
    df_shpae,
    on="state_name",
    how="inner",
    validate="m:1"
)
df_min = df_min.with_columns(pl.col("year").cast(pl.String))
var = "area_fips,year,qtr,industry_code,agglvl_code,month1_emplvl,month2_emplvl,month3_emplvl,total_qtrly_wages,avg_wkly_wage,qtrly_estabs"
df = dp.conn.sql(
    f"""
    SELECT {var} FROM 'QCEWTable' 
        WHERE agglvl_code=74;
    """
).pl()
df = df.with_columns(
    area_fips=pl.col("area_fips").str.zfill(5)
)
df = df.with_columns(
    fips=pl.col("area_fips").str.slice(0,2),
)
df = df.join(
    df_min,
    on=["fips","year"],
    how="inner",
    validate="m:1"
)
df = df.with_columns(
    qtr=pl.col("qtr").cast(pl.Int32),
    year=pl.col("year").cast(pl.Int32)
)

In [None]:
df_dp03 = dp.pull_dp03()
df_dp03 = df_dp03.with_columns(
    qtr=4,
    area_fips=pl.col("geoid")
)
df_dp03

In [None]:
data = df.join(
    df_dp03, on=["area_fips","year","qtr"],how="left",validate="m:1"
).sort(by=["area_fips","year","qtr"]).to_pandas()

In [None]:
data

In [None]:
columns = [
    "total_population",
    "own_children6",
    "own_children17",
    "commute_car",
    "total_house",
    "with_social_security",
    "food_stamp",
]
for col in columns:
    data[col] = data.groupby("area_fips")[col].transform(
        lambda group: group.interpolate(method="cubic")
    )

In [None]:
df = pl.DataFrame(data)

In [None]:
data[data["fips"]]

In [None]:
from shapely import wkt
gdf = gpd.GeoDataFrame(dp.pull_county_shapes())
gdf["geometry"] = gdf["geometry"].apply(wkt.loads)
gdf = gdf.set_geometry("geometry").set_crs("EPSG:4269", allow_override=True)
gdf = gdf.to_crs("EPSG:3395")
gdf["area_fips"] = gdf["geo_id"].astype(str)
gdf["fips"] = gdf["fips"].astype(str)
gdf = gdf[(gdf["area_fips"] == "06081") | (gdf["fips"] == "56")]

In [None]:
gdf.plot()

In [None]:
gdf = gdf.merge(
            data,
            on=["area_fips"],
            how="left",
            validate="1:m",
        )

# 06081

In [None]:
gdf.columns

In [None]:
gdf = gdf[['county_name', 'area_fips', 'year',
       'qtr', 'industry_code', 'agglvl_code', 'avg_wkly_wage', 'qtrly_estabs','state_name', 'min_wage',
       'total_population']]
# tmp[(tmp["industry_code"] == "72") & (tmp["year"] == 2016) & (tmp["qtr"] == 1)].plot("avg_wkly_wage",legend=True)


In [None]:
gdf

In [None]:
gdf["date"] = gdf["year"]*10 + gdf["qtr"]
gdf

In [None]:
data_master = pl.DataFrame(gdf[(gdf["industry_code"] == "72") & (gdf["year"] == 2019)].reset_index(drop=True))
data_master.unpivot(
    index=["year", "qtr", "area_fips"],  # Keep these columns as identifiers
    on=["avg_wkly_wage"],  # Columns to unpivot
    variable_name="value",  # Name of the new column representing the month
    value_name="employment"  # Name of the column for the employment values)
)

In [None]:
columns_to_aggregate = [
    'total_qtrly_wages', 'avg_wkly_wage', 'qtrly_estabs',
    'total_population', 'in_labor_force', 
    'unemployment', 'own_children6', 'own_children17', 'commute_car', 
    'commute_time', 'total_house', 'inc_less_10k', 'inc_10k_15k', 'inc_15k_25k', 
    'inc_25k_35k', 'inc_35k_50k', 'inc_50k_75k', 'inc_75k_100k', 'inc_100k_150k', 
    'inc_150k_200k', 'inc_more_200k', 'with_social_security', 'food_stamp'
]

aggregated_gdf = gdf.groupby(['geometry', 'area_fips', "industry_code"])[columns_to_aggregate].agg(['mean', 'std']).reset_index()

aggregated_gdf.columns = ['_'.join(col).strip() for col in aggregated_gdf.columns.values]
aggregated_gdf = aggregated_gdf.reset_index(drop=True)


In [None]:
aggregated_gdf = gpd.GeoDataFrame(aggregated_gdf, geometry="geometry_")

In [None]:
aggregated_gdf

In [None]:
aggregated_gdf[(gdf["industry_code"] == "72")].plot("avg_wkly_wage_mean",legend=True)

In [None]:
temp = gdf.groupby([""])

In [None]:
gdf[
    (gdf["year"] == 2023) & 
    (gdf["qtr"] == 1) & 
    (gdf["industry_code"] == "11")
    ].plot("avg_wkly_wage")

In [None]:
gdf[(gdf["year"] == 2023) & (gdf["qtr"] == 1) & (gdf["industry_code"] == "11")].plot("with_social_security")