In [3]:
# Area code metadata
AREA_CODES = {
    "C4266": {
        "code": "C4266",
        "name": "Seattle-Tacoma-Bellevue",
        "state": "WA",
        "region": "Seattle-Tacoma-Bellevue, WA MSA",
        "type": "MSA",
    },
    "C3890": {
        "code": "C3890",
        "name": "Portland-Vancouver-Hillsboro",
        "state": "OR-WA",
        "region": "Portland-Vancouver-Hillsboro, OR-WA MSA",
        "type": "MSA",
    },
    "C3346" : {
        "code": "C3346",
        "name": "Minneapolis-St Paul-Bloomington",
        "state": "MN-WI",
        "region": "Minneapolis-St Paul-Bloomington, MN-WI MSA",
        "type": "MSA",
    },
    "C1242": {
        "code": "C1242",
        "name": "Austin-Round Rock-Georgetown",
        "state": "TX",
        "region": "Austin-Round Rock-Georgetown, TX MSA",
        "type": "MSA",
    },
    "C4790": {
        "code": "C4790",
        "name": "Washington-Arlington-Alexandria",
        "state": "DC-VA-MD-WV",
        "region": "Washington-Arlington-Alexandria, DC-VA-MD-WV MSA",
        "type": "MSA",
    }

}

# CBSA code metadata
CBSA_CODES = {
    "42660": {
        "code": "42660",
        "name": "Seattle-Tacoma-Bellevue, WA MSA",
        "state": "WA",
        "type": "MSA",
    },
    "38900": {
        "code": "38900",
        "name": "Portland-Vancouver-Hillsboro, OR-WA MSA",
        "state": "OR-WA",
        "type": "MSA",
    },
    "33460" : {
        "code": "33460",
        "name": "Minneapolis-St Paul-Bloomington, MN-WI MSA",
        "state": "MN-WI",
        "type": "MSA",
    },
    "12420": {
        "code": "12420",
        "name": "Austin-Round Rock-Georgetown, TX MSA",
        "state": "TX",
        "type": "MSA",
    },
    "47900": {
        "code": "47900",
        "name": "Washington-Arlington-Alexandria, DC-VA-MD-WV MSA",
        "state": "DC-VA-MD-WV",
        "type": "MSA",
    }

}

# quarter mapping to month
QUARTER_TO_MONTH = {
    "1": ["01", "02", "03"],
    "2": ["04", "05", "06"],
    "3": ["07", "08", "09"],
    "4": ["10", "11", "12"]
}
# month to quarter mapping
MONTH_TO_QUARTER = {
    "01": "1", 
    "02": "1", 
    "03": "1", 
    "04": "2", 
    "05": "2", 
    "06": "2",   
    "07": "3",
    "08": "3",
    "09": "3",
    "10": "4",
    "11": "4",
    "12": "4"
}      

In [6]:
from bls_housing import load_area_df #, get_cached_path , fetch_area_csv
from collections import defaultdict
import pandas as pd

# areas = AREA_CODES["C4266"]["code"] # Seattle-Tacoma-Bellevue MSA
years = ["2020","2021","2022","2023","2024"]
quarters = ["1", "2", "3", "4"]
# Load DataFrame (uses cache if available)
# get total quarterly wages for MSA for each year and store in total_wages dict

data_list = []

for code in AREA_CODES:
    total_wages = defaultdict(int)

    for year in years:
        for qtr in quarters:
            df = load_area_df(code, year, qtr)
            
            msa = df[df.get('agglvl_code') == 40]
            total_wages_current_qtr = msa['total_qtrly_wages'].iloc[0]
            data_list.append({
                "Area": AREA_CODES[code]["name"],
                "Code": code,
                "Year": year,
                "Quarter": qtr,
                "Total Wages": total_wages_current_qtr
            })
           

# print(data_list)
wages_df = pd.DataFrame(data_list)

# Calculate annual total wages and percentage change
annual_df = wages_df.groupby(["Area", "Code", "Year"])["Total Wages"].sum().reset_index()
annual_df["Change"] = annual_df.groupby("Code")["Total Wages"].pct_change() * 100
print(annual_df)

                               Area   Code  Year   Total Wages     Change
0      Austin-Round Rock-Georgetown  C1242  2020   74347045148        NaN
1      Austin-Round Rock-Georgetown  C1242  2021   86539243397  16.399035
2      Austin-Round Rock-Georgetown  C1242  2022   98564902276  13.896191
3      Austin-Round Rock-Georgetown  C1242  2023  105404165981   6.938843
4      Austin-Round Rock-Georgetown  C1242  2024  112635075220   6.860174
5   Minneapolis-St Paul-Bloomington  C3346  2020  125079670147        NaN
6   Minneapolis-St Paul-Bloomington  C3346  2021  132965536013   6.304674
7   Minneapolis-St Paul-Bloomington  C3346  2022  141354450541   6.309089
8   Minneapolis-St Paul-Bloomington  C3346  2023  146554465897   3.678706
9   Minneapolis-St Paul-Bloomington  C3346  2024  152303078421   3.922509
10     Portland-Vancouver-Hillsboro  C3890  2020   76498944463        NaN
11     Portland-Vancouver-Hillsboro  C3890  2021   83777884363   9.515085
12     Portland-Vancouver-Hillsboro  C

In [32]:
# Load cleaned CBSA CSV from cache instead of manual XLS parsing
from bls_housing.census_cache import load_cbsa_df

years = ["2020","2021","2022","2023","2024"]
total_permits = defaultdict(int)
quarters = ["1", "2", "3", "4"]

for code in CBSA_CODES:
    area = CBSA_CODES[code]["code"]
    print(f"Area: {CBSA_CODES[code]['name']} ({area})")
    for year in years:
        total_permits[year] = 0
        # print(f"Year: {year}")
        for qtr in quarters:
            for i in QUARTER_TO_MONTH[qtr]:  # months in quarter
                df = load_cbsa_df(year, str(i))
                df_current_area = df[df['CBSA'] == int(area)]  # filter for Seattle MSA
                # get total permits for Seattle MSA
                total_permits_current_month = df_current_area['Total'].iloc[0]
                total_permits[year] += total_permits_current_month
                #print(f"Year: {year}, Quarter: {qtr}, Month: {i}, Total Permits: {total_permits_current_month:,.2f}")
        # print(f"[Total for {year} Q1: {total_permits[year]}]")
    for i in range(1, len(years)):
        year = years[i]
        prev_year = years[i-1]
        change = ((total_permits[year] - total_permits[prev_year]) / total_permits[prev_year]) * 100
        print(f"Year: {year}, Total Permits: {total_permits[year]}, Change from {prev_year}: {change:.2f}%")   

    

Area: Seattle-Tacoma-Bellevue, WA MSA (42660)
Year: 2021, Total Permits: 30074.0, Change from 2020: 26.51%
Year: 2022, Total Permits: 25933.0, Change from 2021: -13.77%
Year: 2023, Total Permits: 17619.0, Change from 2022: -32.06%
Year: 2024, Total Permits: 17681, Change from 2023: 0.35%
Area: Portland-Vancouver-Hillsboro, OR-WA MSA (38900)
Year: 2021, Total Permits: 15234.0, Change from 2020: 17.30%
Year: 2022, Total Permits: 12885.0, Change from 2021: -15.42%
Year: 2023, Total Permits: 11254.0, Change from 2022: -12.66%
Year: 2024, Total Permits: 9132, Change from 2023: -18.86%
Area: Minneapolis-St Paul-Bloomington, MN-WI MSA (33460)
Year: 2021, Total Permits: 25057.0, Change from 2020: 17.42%
Year: 2022, Total Permits: 24609.0, Change from 2021: -1.79%
Year: 2023, Total Permits: 17359.0, Change from 2022: -29.46%
Year: 2024, Total Permits: 15227, Change from 2023: -12.28%
Area: Austin-Round Rock-Georgetown, TX MSA (12420)
Year: 2021, Total Permits: 50297.0, Change from 2020: 23.80%


In [2]:
# Load cleaned CBSA CSV from cache instead of manual XLS parsing
from bls_housing.census_cache import load_cbsa_df
df = load_cbsa_df('2025', '1')
print(df.keys())
df.head(10)

Index(['CSA', 'CBSA', 'Name', 'Metro /Micro Code', 'Total', '1 Unit',
       '2 Units', '3 and 4 Units', '5 Units or More',
       'Num of Structures With 5 Units or More', 'nan', 'Total_year_to_date',
       '1 Unit_year_to_date', '2 Units_year_to_date',
       '3 and 4 Units_year_to_date', '5 Units or More_year_to_date',
       'Num of Structures With 5 Units or More_year_to_date'],
      dtype='object')


Unnamed: 0,CSA,CBSA,Name,Metro /Micro Code,Total,1 Unit,2 Units,3 and 4 Units,5 Units or More,Num of Structures With 5 Units or More,nan,Total_year_to_date,1 Unit_year_to_date,2 Units_year_to_date,3 and 4 Units_year_to_date,5 Units or More_year_to_date,Num of Structures With 5 Units or More_year_to_date
0,999,10100,"Aberdeen, SD ...",5,0,0,0,0,0,0,,0,0,0,0,0,0
1,999,10140,"Aberdeen, WA ...",5,24,24,0,0,0,0,,24,24,0,0,0,0
2,101,10180,"Abilene, TX ...",2,82,78,4,0,0,0,,82,78,4,0,0,0
3,999,10220,"Ada, OK ...",5,4,0,4,0,0,0,,4,0,4,0,0,0
4,220,10300,"Adrian, MI ...",5,8,8,0,0,0,0,,8,8,0,0,0,0
5,184,10420,"Akron, OH ...",2,56,42,0,0,14,2,,56,42,0,0,14,2
6,999,10460,"Alamogordo, NM ...",5,1,1,0,0,0,0,,1,1,0,0,0,0
7,999,10480,"Alamosa, CO ...",5,10,6,4,0,0,0,,10,6,4,0,0,0
8,999,10500,"Albany, GA ...",4,6,6,0,0,0,0,,6,6,0,0,0,0
9,440,10540,"Albany, OR ...",2,10,8,2,0,0,0,,10,8,2,0,0,0


In [4]:
# Show cached CSV path
from bls_housing.census_cache import get_cached_csv_path
print(get_cached_csv_path('2025', '1'))


/home/ekang/python_BLS_housing/data/cache/census/csv/CBSA_2025_01.csv


In [None]:
df_current_area = df[df['CBSA'] == 42660]  # filter for Seattle MSA
print(df_current_area)

# get total permits for Seattle MSA
df_Seattle_total_wages = df_current_area['Total'].iloc[0]

#print(df_Seattle_total_wages.columns)
#print(df_Seattle.columns.value_counts())
print(f"[Total: {df_Seattle_total_wages}]")


     CSA   CBSA                                               Name  \
753  500  42660  Seattle-Tacoma-Bellevue, WA                   ...   

     Metro /Micro Code  Total  1 Unit  2 Units  3 and 4 Units  \
753                  2   1203     528       68             78   

     5 Units or More  Num of Structures With 5 Units or More  nan  \
753              529                                      24  NaN   

     Total_year_to_date  1 Unit_year_to_date  2 Units_year_to_date  \
753                1203                  528                    68   

     3 and 4 Units_year_to_date  5 Units or More_year_to_date  \
753                          78                           529   

     Num of Structures With 5 Units or More_year_to_date  
753                                                 24    
[Total: 1203]
