In [1]:
import pandas as pd
import os

In [2]:
# 1. Load a sample LAUS file
BLS_PATH = "/Users/koacow/BOSTON UNIVERSITY Dropbox/Ngoc Duy Khoa Cao/GLOB~S/Data/U.S. County Data/county_controls/data/BLSLAUCN_1990_2022/raw"
sample_file = os.path.join(BLS_PATH, "laucnty99.xlsx")
sample_df = pd.read_excel(sample_file, sheet_name=None, skiprows=1)

In [3]:
# 2. Read all BLS LAUS files from 1990 to 2022 and concatenate them into a single DataFrame
merged_bls_df = None
for f in os.listdir(BLS_PATH):
    try:
        if f.startswith('laucnty') and f.endswith('.xlsx'):
            file_path = os.path.join(BLS_PATH, f)
            df = pd.read_excel(file_path, skiprows=1, dtype={
                "State FIPS Code": str,
                "County FIPS Code": str,
                "Unemployment Rate (%)": float,
            }, na_values=["N.A."])
            df = df[df['State FIPS Code'].notna() & df['County FIPS Code'].notna()]
            df['Year'] = df['Year'].astype(int)
            if merged_bls_df is None:
                merged_bls_df = df
            else:
                merged_bls_df = pd.concat([merged_bls_df, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing file {f}: {e}")

merged_bls_df


Unnamed: 0,LAUS Code,State FIPS Code,County FIPS Code,County Name/State Abbreviation,Year,Labor Force,Employed,Unemployed,Unemployment Rate (%)
0,CN0100100000000,01,001,"Autauga County, AL",1995,19629.0,18521.0,1108.0,5.6
1,CN0100300000000,01,003,"Baldwin County, AL",1995,60785.0,57901.0,2884.0,4.7
2,CN0100500000000,01,005,"Barbour County, AL",1995,12345.0,11333.0,1012.0,8.2
3,CN0100700000000,01,007,"Bibb County, AL",1995,7986.0,7463.0,523.0,6.5
4,CN0100900000000,01,009,"Blount County, AL",1995,20982.0,20061.0,921.0,4.4
...,...,...,...,...,...,...,...,...,...
106214,CN7214500000000,72,145,"Vega Baja Municipio, PR",2002,20219.0,17614.0,2605.0,12.9
106215,CN7214700000000,72,147,"Vieques Municipio, PR",2002,2625.0,2159.0,466.0,17.8
106216,CN7214900000000,72,149,"Villalba Municipio, PR",2002,8100.0,6820.0,1280.0,15.8
106217,CN7215100000000,72,151,"Yabucoa Municipio, PR",2002,11187.0,8961.0,2226.0,19.9


In [4]:
# 3. Create 'FIPS5' column, rename columns, and drop unnecessary ones
merged_bls_df['FIPS5'] = merged_bls_df['State FIPS Code'] + merged_bls_df['County FIPS Code']
merged_bls_df = merged_bls_df.rename(columns={
    'Year': 'year',
    'Unemployment Rate (%)': 'unemployment_rate',
})
merged_bls_df = merged_bls_df.drop(columns=['State FIPS Code', 'County FIPS Code', 'LAUS Code', 'Labor Force', 'Employed', 'Unemployed', 'County Name/State Abbreviation'])
merged_bls_df

Unnamed: 0,year,unemployment_rate,FIPS5
0,1995,5.6,01001
1,1995,4.7,01003
2,1995,8.2,01005
3,1995,6.5,01007
4,1995,4.4,01009
...,...,...,...
106214,2002,12.9,72145
106215,2002,17.8,72147
106216,2002,15.8,72149
106217,2002,19.9,72151


In [5]:
merged_bls_df['year'].value_counts()

year
2021    3221
2020    3221
2022    3221
2019    3220
2012    3220
2017    3220
2010    3220
2015    3220
2011    3220
2013    3220
2016    3220
2018    3220
2014    3220
1997    3218
2003    3218
1996    3218
2008    3218
2004    3218
1995    3218
2009    3218
2005    3218
2006    3218
2007    3218
1994    3218
2000    3218
1998    3218
1999    3218
2001    3218
2002    3218
1990    3217
1991    3217
1992    3217
1993    3217
Name: count, dtype: int64

In [6]:
merged_bls_df.isna().sum()

year                  0
unemployment_rate    92
FIPS5                 0
dtype: int64

In [7]:
OUTPUT_PATH = "/Users/koacow/BOSTON UNIVERSITY Dropbox/Ngoc Duy Khoa Cao/GLOB~S/Data/U.S. County Data/county_controls/data/BLSLAUCN_1990_2022/bls_county_unemp_1990_2022.csv"
merged_bls_df.to_csv(OUTPUT_PATH, index=False)