In [10]:
# Area code metadata
AREA_CODES = {
    "C4266": {
        "code": "C4266",
        "name": "Seattle-Tacoma-Bellevue",
        "state": "WA",
        "region": "Seattle-Tacoma-Bellevue, WA MSA",
        "type": "MSA",
    },
    "C3890": {
        "code": "C3890",
        "name": "Portland-Vancouver-Hillsboro",
        "state": "OR-WA",
        "region": "Portland-Vancouver-Hillsboro, OR-WA MSA",
        "type": "MSA"
    },
    "C3346" : {
        "code": "C3346",
        "name": "Minneapolis-St Paul-Bloomington",
        "state": "MN-WI",
        "region": "Minneapolis-St Paul-Bloomington, MN-WI MSA",
        "type": "MSA"
    }

}

# CBSA code metadata
CBSA_CODES = {
    "42660": {
        "code": "42660",
        "name": "Seattle-Tacoma-Bellevue, WA MSA",
        "state": "WA",
        "type": "MSA",
    },
    "38900": {
        "code": "38900",
        "name": "Portland-Vancouver-Hillsboro, OR-WA MSA",
        "state": "OR-WA",
        "type": "MSA"
    },
    "33460" : {
        "code": "33460",
        "name": "Minneapolis-St Paul-Bloomington, MN-WI MSA",
        "state": "MN-WI",
        "type": "MSA"
    }

}

# quarter mapping to month
QUARTER_TO_MONTH = {
    "1": ["01", "02", "03"],
    "2": ["04", "05", "06"],
    "3": ["07", "08", "09"],
    "4": ["10", "11", "12"]
}
# month to quarter mapping
MONTH_TO_QUARTER = {
    "01": "1", 
    "02": "1", 
    "03": "1", 
    "04": "2", 
    "05": "2", 
    "06": "2",   
    "07": "3",
    "08": "3",
    "09": "3",
    "10": "4",
    "11": "4",
    "12": "4"
}      

In [11]:
# Example: load a QCEW area CSV using the cache utilities
from bls_housing import load_area_df, get_cached_path #, fetch_area_csv

# areas = AREA_CODES["C4266"]["code"] # Seattle-Tacoma-Bellevue MSA
years = ["2020","2021","2022","2023","2024","2025"]
qtr = "1"
# Load DataFrame (uses cache if available)
# get total quarterly wages for MSA for each year and store in total_wages dict
for code in AREA_CODES:
    total_permits = {}

    area = AREA_CODES[code]["code"]
    print(f"Area: {AREA_CODES[code]['name']} ({area})")

    for year in years:
        df = load_area_df(area, year, qtr)
        # print(f"Loaded rows: {len(df)}")
        # Show cached path (or None)
        cached = get_cached_path(area, year, qtr)
        # print("Cached file:", cached)
        msa = df[df.get('agglvl_code') == 40]
        total_permits[year] = msa['total_qtrly_wages'][0]
        print(f"Year: {year}, Total Wages: {total_permits[year]:,.2f}")

    for i in range(1, len(years)):
        year = years[i]
        prev_year = years[i-1]
        change = ((total_permits[year] - total_permits[prev_year]) / total_permits[prev_year]) * 100
        print(f"Year: {year}, Total Wages: {total_permits[year]:,.2f}, Change from {prev_year}: {change:.2f}%")   
    print()
    


Area: Seattle-Tacoma-Bellevue (C4266)
Year: 2020, Total Wages: 45,200,515,733.00
Year: 2021, Total Wages: 46,310,344,060.00
Year: 2022, Total Wages: 50,669,607,121.00
Year: 2023, Total Wages: 55,513,395,217.00
Year: 2024, Total Wages: 58,911,332,927.00
Year: 2025, Total Wages: 62,262,553,595.00
Year: 2021, Total Wages: 46,310,344,060.00, Change from 2020: 2.46%
Year: 2022, Total Wages: 50,669,607,121.00, Change from 2021: 9.41%
Year: 2023, Total Wages: 55,513,395,217.00, Change from 2022: 9.56%
Year: 2024, Total Wages: 58,911,332,927.00, Change from 2023: 6.12%
Year: 2025, Total Wages: 62,262,553,595.00, Change from 2024: 5.69%

Area: Portland-Vancouver-Hillsboro (C3890)
Year: 2020, Total Wages: 19,780,516,586.00
Year: 2021, Total Wages: 19,545,962,532.00
Year: 2022, Total Wages: 21,956,438,929.00
Year: 2023, Total Wages: 23,674,348,879.00
Year: 2024, Total Wages: 24,212,290,614.00
Year: 2025, Total Wages: 24,657,399,584.00
Year: 2021, Total Wages: 19,545,962,532.00, Change from 2020: 

In [1]:
# Fetch and load cleaned CBSA CSV using cache utilities
from bls_housing.census_cache import fetch_cbsa_csv, load_cbsa_df
# Ensure CSV is downloaded and cached (will convert from XLS if needed)
csv_path = fetch_cbsa_csv('2025', '1')
print('CSV cached at', csv_path)
# Load into a pandas DataFrame (reads from cached CSV)
df = load_cbsa_df('2025', '1')
df.head(5)

CSV cached at /home/ekang/python_BLS_housing/data/cache/census/csv/CBSA_2025_01.csv


Unnamed: 0,CSA,CBSA,Name,Metro /Micro Code,Total,1 Unit,2 Units,3 and 4 Units,5 Units or More,Num of Structures With 5 Units or More,nan,Total_year_to_date,1 Unit_year_to_date,2 Units_year_to_date,3 and 4 Units_year_to_date,5 Units or More_year_to_date,Num of Structures With 5 Units or More_year_to_date
0,999,10100,"Aberdeen, SD ...",5,0,0,0,0,0,0,,0,0,0,0,0,0
1,999,10140,"Aberdeen, WA ...",5,24,24,0,0,0,0,,24,24,0,0,0,0
2,101,10180,"Abilene, TX ...",2,82,78,4,0,0,0,,82,78,4,0,0,0
3,999,10220,"Ada, OK ...",5,4,0,4,0,0,0,,4,0,4,0,0,0
4,220,10300,"Adrian, MI ...",5,8,8,0,0,0,0,,8,8,0,0,0,0


In [2]:
# Load cleaned CBSA CSV from cache instead of manual XLS parsing
from bls_housing.census_cache import load_cbsa_df
df = load_cbsa_df('2025', '1')
print(df.keys())
df.head(10)

Index(['CSA', 'CBSA', 'Name', 'Metro /Micro Code', 'Total', '1 Unit',
       '2 Units', '3 and 4 Units', '5 Units or More',
       'Num of Structures With 5 Units or More', 'nan', 'Total_year_to_date',
       '1 Unit_year_to_date', '2 Units_year_to_date',
       '3 and 4 Units_year_to_date', '5 Units or More_year_to_date',
       'Num of Structures With 5 Units or More_year_to_date'],
      dtype='object')


Unnamed: 0,CSA,CBSA,Name,Metro /Micro Code,Total,1 Unit,2 Units,3 and 4 Units,5 Units or More,Num of Structures With 5 Units or More,nan,Total_year_to_date,1 Unit_year_to_date,2 Units_year_to_date,3 and 4 Units_year_to_date,5 Units or More_year_to_date,Num of Structures With 5 Units or More_year_to_date
0,999,10100,"Aberdeen, SD ...",5,0,0,0,0,0,0,,0,0,0,0,0,0
1,999,10140,"Aberdeen, WA ...",5,24,24,0,0,0,0,,24,24,0,0,0,0
2,101,10180,"Abilene, TX ...",2,82,78,4,0,0,0,,82,78,4,0,0,0
3,999,10220,"Ada, OK ...",5,4,0,4,0,0,0,,4,0,4,0,0,0
4,220,10300,"Adrian, MI ...",5,8,8,0,0,0,0,,8,8,0,0,0,0
5,184,10420,"Akron, OH ...",2,56,42,0,0,14,2,,56,42,0,0,14,2
6,999,10460,"Alamogordo, NM ...",5,1,1,0,0,0,0,,1,1,0,0,0,0
7,999,10480,"Alamosa, CO ...",5,10,6,4,0,0,0,,10,6,4,0,0,0
8,999,10500,"Albany, GA ...",4,6,6,0,0,0,0,,6,6,0,0,0,0
9,440,10540,"Albany, OR ...",2,10,8,2,0,0,0,,10,8,2,0,0,0


In [4]:
# Show cached CSV path
from bls_housing.census_cache import get_cached_csv_path
print(get_cached_csv_path('2025', '1'))


/home/ekang/python_BLS_housing/data/cache/census/csv/CBSA_2025_01.csv


In [5]:
df_Seattle = df[df['CBSA'] == 42660]  # filter for Seattle MSA
print(df_Seattle)

# get total permits for Seattle MSA
df_Seattle_Total_Permits = df_Seattle['Total'].iloc[0]

#print(df_Seattle_Total_Permits.columns)
#print(df_Seattle.columns.value_counts())
print(f"[Total: {df_Seattle_Total_Permits}]")


     CSA   CBSA                                               Name  \
753  500  42660  Seattle-Tacoma-Bellevue, WA                   ...   

     Metro /Micro Code  Total  1 Unit  2 Units  3 and 4 Units  \
753                  2   1203     528       68             78   

     5 Units or More  Num of Structures With 5 Units or More  nan  \
753              529                                      24  NaN   

     Total_year_to_date  1 Unit_year_to_date  2 Units_year_to_date  \
753                1203                  528                    68   

     3 and 4 Units_year_to_date  5 Units or More_year_to_date  \
753                          78                           529   

     Num of Structures With 5 Units or More_year_to_date  
753                                                 24    
[Total: 1203]


In [12]:
# Load cleaned CBSA CSV from cache instead of manual XLS parsing
from bls_housing.census_cache import load_cbsa_df

years = ["2020","2021","2022","2023","2024","2025"]
total_permits = {}
for code in CBSA_CODES:
    area = CBSA_CODES[code]["code"]
    print(f"Area: {CBSA_CODES[code]['name']} ({area})")
    for year in years:
        print(f"Year: {year}")
        total_permits[year] = 0
        for i in QUARTER_TO_MONTH['1']:  # months in Q1
            df = load_cbsa_df(year, str(i))
            df_Seattle = df[df['CBSA'] == int(area)]  # filter for Seattle MSA

            # get total permits for Seattle MSA
            total_permits_current_month = df_Seattle['Total'].iloc[0]
            total_permits[year] += total_permits_current_month
            print(total_permits_current_month)
        print(f"[Total for {year} Q1: {total_permits[year]}]")
    for i in range(1, len(years)):
        year = years[i]
        prev_year = years[i-1]
        change = ((total_permits[year] - total_permits[prev_year]) / total_permits[prev_year]) * 100
        print(f"Year: {year}, Total Permits: {total_permits[year]:,.2f}, Change from {prev_year}: {change:.2f}%")   

    

Area: Seattle-Tacoma-Bellevue, WA MSA (42660)
Year: 2020
1421.0
1775.0
1897.0
[Total for 2020 Q1: 5093.0]
Year: 2021
2422.0
2455.0
2643.0
[Total for 2021 Q1: 7520.0]
Year: 2022
1904.0
2307.0
2806.0
[Total for 2022 Q1: 7017.0]
Year: 2023
1046.0
2135.0
1426.0
[Total for 2023 Q1: 4607.0]
Year: 2024
1083
1370
2346
[Total for 2024 Q1: 4799]
Year: 2025
1203
1078
926
[Total for 2025 Q1: 3207]
Year: 2021, Total Permits: 7,520.00, Change from 2020: 47.65%
Year: 2022, Total Permits: 7,017.00, Change from 2021: -6.69%
Year: 2023, Total Permits: 4,607.00, Change from 2022: -34.35%
Year: 2024, Total Permits: 4,799.00, Change from 2023: 4.17%
Year: 2025, Total Permits: 3,207.00, Change from 2024: -33.17%
Area: Portland-Vancouver-Hillsboro, OR-WA MSA (38900)
Year: 2020
1037.0
753.0
2047.0
[Total for 2020 Q1: 3837.0]
Year: 2021
887.0
885.0
1310.0
[Total for 2021 Q1: 3082.0]
Year: 2022
830.0
1274.0
929.0
[Total for 2022 Q1: 3033.0]
Year: 2023
1057.0
1154.0
917.0
[Total for 2023 Q1: 3128.0]
Year: 2024
5

In [None]:
# # Quick peek and filter to MSA totals (agglvl_code == 40)
# print(df.head())
# msa = df[df.get('agglvl_code') == 40]
# print(f"MSA rows: {len(msa)}")
# print(msa.head())

In [None]:
# import requests
#import json
#import pandas as pd
# import numpy as np
'''C4266 Seattle-Tacoma-Bellevue, WA MSA QCEW Area Code
C3890 Portland-Vancouver-Hillsboro, OR-WA MSA QCEW Area Code'''
# headers = {'Content-Type': 'application/json'}

# import urllib.request
# import urllib
# def qcewCreateDataRows(csv):
#     dataRows = []
#     try: dataLines = csv.decode().split('\r\n')
#     except er: dataLines = csv.split('\r\n')
#     for row in dataLines:
#         dataRows.append(row.split(','))
#     return dataRows

# def qcewGetAreaData(year,qtr,area):
#     urlPath = "http://data.bls.gov/cew/data/api/[YEAR]/[QTR]/area/[AREA].csv"
#     urlPath = urlPath.replace("[YEAR]",year)
#     urlPath = urlPath.replace("[QTR]",qtr.lower())
#     urlPath = urlPath.replace("[AREA]",area.upper())
#     httpStream = urllib.request.urlopen(urlPath)
#     csv = httpStream.read()
#     httpStream.close()
#     return qcewCreateDataRows(csv)

# def qcewGetAreaUrl(year,qtr,area):
#     urlPath = "https://data.bls.gov/cew/data/api/[YEAR]/[QTR]/area/[AREA].csv"
#     urlPath = urlPath.replace("[YEAR]",year)
#     urlPath = urlPath.replace("[QTR]",qtr.lower())
#     urlPath = urlPath.replace("[AREA]",area.upper())
#     return urlPath

# URLPATH = qcewGetAreaUrl("2025", "1", "C4266")
#httpStream = urllib.request.urlopen(URLPATH)
#httpStreamData = httpStream.read()



In [None]:
# print(httpStreamData)
# pdf = pd.read_csv(URLPATH)
# print(pdf[0])
# print(pdf)
# httpStreamDataRows = qcewCreateDataRows(httpStreamData)

# for row in httpStreamDataRows:
#     print(row)

In [None]:
# important values, agglvl_code 40 is MSA Total covered, own_code 0 is total covered, industry_code 10 is all industries, size_code 0 is All establishment sizes.
# filtered_df = pdf[pdf['agglvl_code'] == 40]
# print(filtered_df)

1
