In [None]:
# Area code metadata
AREA_CODES = {
    "C4266": {
        "code": "C4266",
        "name": "Seattle-Tacoma-Bellevue",
        "state": "WA",
        "region": "Seattle-Tacoma-Bellevue, WA MSA",
        "type": "MSA",
    },
    "C3890": {
        "code": "C3890",
        "name": "Portland-Vancouver-Hillsboro",
        "state": "OR-WA",
        "region": "Portland-Vancouver-Hillsboro, OR-WA MSA",
        "type": "MSA"
    }
}

# CBSA code metadata
CBSA_CODES = {
    "42660": {
        "code": "42660",
        "name": "Seattle-Tacoma-Bellevue, WA MSA",
        "state": "WA",
        "type": "MSA",
    },
    "38900": {
        "code": "38900",
        "name": "Portland-Vancouver-Hillsboro, OR-WA MSA",
        "state": "OR-WA",
        "type": "MSA"
    }
}

In [None]:
# Example: load a QCEW area CSV using the cache utilities
from bls_housing import load_area_df, get_cached_path #, fetch_area_csv

# areas = AREA_CODES["C4266"]["code"] # Seattle-Tacoma-Bellevue MSA
years = ["2020","2021","2022","2023","2024","2025"]
qtr = "1"
# Load DataFrame (uses cache if available)
# get total quarterly wages for MSA for each year and store in total_wages dict
for code in AREA_CODES:
    total_wages = {}

    area = AREA_CODES[code]["code"]
    print(f"Area: {AREA_CODES[code]['name']} ({area})")

    for year in years:
        df = load_area_df(area, year, qtr)
        # print(f"Loaded rows: {len(df)}")
        # Show cached path (or None)
        cached = get_cached_path(area, year, qtr)
        # print("Cached file:", cached)
        msa = df[df.get('agglvl_code') == 40]
        total_wages[year] = msa['total_qtrly_wages'][0]
        print(f"Year: {year}, Total Wages: {total_wages[year]:,.2f}")

    for i in range(1, len(years)):
        year = years[i]
        prev_year = years[i-1]
        change = ((total_wages[year] - total_wages[prev_year]) / total_wages[prev_year]) * 100
        print(f"Year: {year}, Total Wages: {total_wages[year]:,.2f}, Change from {prev_year}: {change:.2f}%")   
    print()
    


Area: Seattle-Tacoma-Bellevue (C4266)
Year: 2020, Total Wages: 45,200,515,733.00
Year: 2021, Total Wages: 46,310,344,060.00
Year: 2022, Total Wages: 50,669,607,121.00
Year: 2023, Total Wages: 55,513,395,217.00
Year: 2024, Total Wages: 58,911,332,927.00
Year: 2025, Total Wages: 62,262,553,595.00
Year: 2021, Total Wages: 46,310,344,060.00, Change from 2020: 2.46%
Year: 2022, Total Wages: 50,669,607,121.00, Change from 2021: 9.41%
Year: 2023, Total Wages: 55,513,395,217.00, Change from 2022: 9.56%
Year: 2024, Total Wages: 58,911,332,927.00, Change from 2023: 6.12%
Year: 2025, Total Wages: 62,262,553,595.00, Change from 2024: 5.69%

Area: Portland-Vancouver-Hillsboro (C3890)
Year: 2020, Total Wages: 19,780,516,586.00
Year: 2021, Total Wages: 19,545,962,532.00
Year: 2022, Total Wages: 21,956,438,929.00
Year: 2023, Total Wages: 23,674,348,879.00
Year: 2024, Total Wages: 24,212,290,614.00
Year: 2025, Total Wages: 24,657,399,584.00
Year: 2021, Total Wages: 19,545,962,532.00, Change from 2020: 

In [9]:
# link format https://www.census.gov/construction/bps/xls/cbsamonthly_202501.xls

# test fetch census housing permits for Seattle MSA Jan 2025
urlpath = "https://www.census.gov/construction/bps/xls/cbsamonthly_202501.xls"

import pandas as pd
import requests
from io import BytesIO

urlpath = "https://www.census.gov/construction/bps/xls/cbsamonthly_202501.xls"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
}
resp = requests.get(urlpath, headers=headers, timeout=20)
resp.raise_for_status()
df = pd.read_excel(BytesIO(resp.content))
df = df.tail(df.size - 5)  # remove header rows
df.head(5)
# rewards_df_sample[rewards_df_sample['City'] != 'G']

Unnamed: 0,New Privately Owned Housing Units Authorized,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16
0,Unadjusted Units by CBSA,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,,
2,202501,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,
4,,,,,,Current Month,,,,,,,Year to Date,,,,


In [63]:
# get local file version of census housing permits for Seattle MSA Jan 2025
# note this is the raw version downloaded from census.gov, not yet processed
# we drop the first several rows to get to the data table and then 
# set the header row appropriately
filepath = "/home/ekang/python_BLS_housing/data/cbsamonthly_202501.xls" 
import pandas as pd     
df = pd.read_excel(filepath)

df = df.iloc[6:]  # remove header rows
new_header = df.iloc[0]
df = df[1:]  # take the data less the header row
df.columns = new_header  # set the header row as the df header
print(df.keys())
df.head(10)
# rewards_df_sample[rewards_df_sample['City'] != 'G']

Index([                                   'CSA',
                                         'CBSA',
                                         'Name',
                            'Metro /Micro Code',
                                        'Total',
                                       '1 Unit',
                                      '2 Units',
                                '3 and 4 Units',
                              '5 Units or More',
       'Num of Structures With 5 Units or More',
                                            nan,
                                        'Total',
                                       '1 Unit',
                                      '2 Units',
                                '3 and 4 Units',
                              '5 Units or More',
       'Num of Structures With 5 Units or More'],
      dtype='object', name=6)


6,CSA,CBSA,Name,Metro /Micro Code,Total,1 Unit,2 Units,3 and 4 Units,5 Units or More,Num of Structures With 5 Units or More,NaN,Total.1,1 Unit.1,2 Units.1,3 and 4 Units.1,5 Units or More.1,Num of Structures With 5 Units or More.1
7,999,10100,"Aberdeen, SD ...",5,0,0,0,0,0,0,,0,0,0,0,0,0
8,999,10140,"Aberdeen, WA ...",5,24,24,0,0,0,0,,24,24,0,0,0,0
9,101,10180,"Abilene, TX ...",2,82,78,4,0,0,0,,82,78,4,0,0,0
10,999,10220,"Ada, OK ...",5,4,0,4,0,0,0,,4,0,4,0,0,0
11,220,10300,"Adrian, MI ...",5,8,8,0,0,0,0,,8,8,0,0,0,0
12,184,10420,"Akron, OH ...",2,56,42,0,0,14,2,,56,42,0,0,14,2
13,999,10460,"Alamogordo, NM ...",5,1,1,0,0,0,0,,1,1,0,0,0,0
14,999,10480,"Alamosa, CO ...",5,10,6,4,0,0,0,,10,6,4,0,0,0
15,999,10500,"Albany, GA ...",4,6,6,0,0,0,0,,6,6,0,0,0,0
16,440,10540,"Albany, OR ...",2,10,8,2,0,0,0,,10,8,2,0,0,0


In [64]:
# rename 2nd copies of columns by adding _structures to avoid duplicate column names
# build new column names without calling string methods on non-string labels
new_cols = []
seen = {}
for col in df.columns:
	key = col if isinstance(col, str) else str(col)
	if seen.get(key, 0) == 0:
		new_cols.append(key)
	else:
		new_cols.append(f"{key}_structures")
	seen[key] = seen.get(key, 0) + 1
df.columns = new_cols
df.keys()

Index(['CSA', 'CBSA', 'Name', 'Metro /Micro Code', 'Total', '1 Unit',
       '2 Units', '3 and 4 Units', '5 Units or More',
       'Num of Structures With 5 Units or More', 'nan', 'Total_structures',
       '1 Unit_structures', '2 Units_structures', '3 and 4 Units_structures',
       '5 Units or More_structures',
       'Num of Structures With 5 Units or More_structures'],
      dtype='object')

In [65]:
# export as csv 
df.to_csv("/home/ekang/python_BLS_housing/data/cbsamonthly_202501_cleaned.csv", index=False)


In [70]:
df_Seattle = df[df['CBSA'] == 42660]  # filter for Seattle MSA
print(df_Seattle)

# get total permits for Seattle MSA
df_Seattle_Total_Permits = df_Seattle['Total'].iloc[0]

#print(df_Seattle_Total_Permits.columns)
#print(df_Seattle.columns.value_counts())
print(f"[Total: {df_Seattle_Total_Permits}]")


     CSA   CBSA                                               Name  \
760  500  42660  Seattle-Tacoma-Bellevue, WA                   ...   

    Metro /Micro Code Total 1 Unit 2 Units 3 and 4 Units 5 Units or More  \
760                 2  1203    528      68            78             529   

    Num of Structures With 5 Units or More  nan Total_structures  \
760                                     24  NaN             1203   

    1 Unit_structures 2 Units_structures 3 and 4 Units_structures  \
760               528                 68                       78   

    5 Units or More_structures  \
760                        529   

    Num of Structures With 5 Units or More_structures  
760                                                24  
[Total: 1203]


In [40]:
# get year over year percent change in total wages    



In [None]:
# # Quick peek and filter to MSA totals (agglvl_code == 40)
# print(df.head())
# msa = df[df.get('agglvl_code') == 40]
# print(f"MSA rows: {len(msa)}")
# print(msa.head())

In [None]:
# import requests
#import json
#import pandas as pd
# import numpy as np
'''C4266 Seattle-Tacoma-Bellevue, WA MSA QCEW Area Code
C3890 Portland-Vancouver-Hillsboro, OR-WA MSA QCEW Area Code'''
# headers = {'Content-Type': 'application/json'}

# import urllib.request
# import urllib
# def qcewCreateDataRows(csv):
#     dataRows = []
#     try: dataLines = csv.decode().split('\r\n')
#     except er: dataLines = csv.split('\r\n')
#     for row in dataLines:
#         dataRows.append(row.split(','))
#     return dataRows

# def qcewGetAreaData(year,qtr,area):
#     urlPath = "http://data.bls.gov/cew/data/api/[YEAR]/[QTR]/area/[AREA].csv"
#     urlPath = urlPath.replace("[YEAR]",year)
#     urlPath = urlPath.replace("[QTR]",qtr.lower())
#     urlPath = urlPath.replace("[AREA]",area.upper())
#     httpStream = urllib.request.urlopen(urlPath)
#     csv = httpStream.read()
#     httpStream.close()
#     return qcewCreateDataRows(csv)

# def qcewGetAreaUrl(year,qtr,area):
#     urlPath = "https://data.bls.gov/cew/data/api/[YEAR]/[QTR]/area/[AREA].csv"
#     urlPath = urlPath.replace("[YEAR]",year)
#     urlPath = urlPath.replace("[QTR]",qtr.lower())
#     urlPath = urlPath.replace("[AREA]",area.upper())
#     return urlPath

# URLPATH = qcewGetAreaUrl("2025", "1", "C4266")
#httpStream = urllib.request.urlopen(URLPATH)
#httpStreamData = httpStream.read()



In [None]:
# print(httpStreamData)
# pdf = pd.read_csv(URLPATH)
# print(pdf[0])
# print(pdf)
# httpStreamDataRows = qcewCreateDataRows(httpStreamData)

# for row in httpStreamDataRows:
#     print(row)

In [None]:
# important values, agglvl_code 40 is MSA Total covered, own_code 0 is total covered, industry_code 10 is all industries, size_code 0 is All establishment sizes.
# filtered_df = pdf[pdf['agglvl_code'] == 40]
# print(filtered_df)

1
