In [4]:
import glob
import pandas as pd
import re

# Reads from disk in the expected format SVI_<year>_US.csv
# How do we record and pass in transformations from old to new? And check what the most up to date version in the database is?
# Where do we store those transformations. How do we pass in the most recent data in the database so we know which transformations are needed between versions?
# Loading in 2022 to start then plan for the required configuration between versions. 
def load_svi_from_local(local_path, year):
    # Find all crosswalk files in the directory
    files_to_load = glob.glob(f"{local_path}/SVI_{year}_US.csv")

    if not files_to_load:
        raise ValueError(f"No file found for year {year} in {local_path}!")
    if len(files_to_load) > 1:
        raise ValueError(f"Multiple files found for {year} in {local_path}!\n{files_to_load}")
    
    file = files_to_load[0]
    df_ret = pd.read_csv(file)
    return df_ret

In [15]:
df_svi = load_svi_from_local("../chosen_data/svi/", 2022)

In [22]:
import glob
import pandas as pd
import re

# Zillow file will always be an overwrite due to how the data apears in the columns
def load_zillow_from_local(local_path, zillow_file):
    # Find all crosswalk files in the directory
    files_to_load = glob.glob(f"{local_path}/{zillow_file}")

    if not files_to_load:
        raise ValueError(f"No file found for year {zillow_file} in {local_path}!")
    if len(files_to_load) > 1:
        # Pick the most recent one here probably?
        raise ValueError(f"Multiple files found for {zillow_file} in {local_path}!\n{files_to_load}")
    
    file = files_to_load[0]
    df_ret = pd.read_csv(file)
    return df_ret

In [23]:
df_zillow_mean_to_pending = load_zillow_from_local("../chosen_data/zillow/", "Metro_mean_doz_pending_uc_sfrcondo_month.csv")

In [24]:
import duckdb

# create a connection to a file called 'test_database.db'
con = duckdb.connect("../test_database.db")

con.sql("""
CREATE OR REPLACE TABLE zillow_mean_to_pending AS
    SELECT * FROM df_zillow_mean_to_pending;
           """)

In [25]:
con.sql("show tables")

┌────────────────────────┐
│          name          │
│        varchar         │
├────────────────────────┤
│ crosswalk_cbsa_zip     │
│ crosswalk_zillow       │
│ crosswalk_zip_tract    │
│ svi                    │
│ zillow_mean_to_pending │
└────────────────────────┘

In [26]:
con.sql("SELECT * FROM zillow_mean_to_pending  LIMIT 10")

┌──────────┬──────────┬──────────────────┬────────────┬───────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬──────────

In [27]:
con.commit()
con.close()

: 