# Loading LAUS and BRFSS CDC Data and Pivoting BRFSS for Michigan

In this notebook, we will be loading the BRFSS and LAUS datasets for Michigan and pivoting the BRFSS dataset from its current format (individual survey responses) into percentages that can be compared with the data for unemployment from LAUS, aligned to each county, for each year. We will also import weights for the BRFSS survey questions, for processing data further downstream into weighted and unweighted categories for regression, and apply a codebook.

In [1]:
# ===============================
# 1. Setup and Load BLS Data
# ===============================

import pandas as pd
import os
import re 
import numpy as np 

# Load the BLS CSV file from 01_processing_ladata_files
df = pd.read_csv("bls_mi/Michigan_county_employment.csv")

print("Shape of dataset:", df.shape)
print("Columns:", df.columns.tolist())
print("\nInfo:")
print(df.info())
display(df.head())

# Standardize column names
df = df.rename(columns={"county": "county_name", "year": "survey_year"})

# Collapse monthly values to yearly summaries
bls_summary = (
    df.groupby(["county_name", "survey_year"])
    .agg({
        "employment": ["mean", "min", "max"],
        "unemployment": ["mean", "min", "max"],
        "labor_force": ["mean", "min", "max"],
        "unemployment_rate": ["mean", "min", "max"]
    })
    .reset_index()
)

# Flatten hierarchical columns
bls_summary.columns = ["_".join(c).strip("_") for c in bls_summary.columns.values]

print("BLS summary shape:", bls_summary.shape)
print(bls_summary.columns)
allegan = bls_summary[bls_summary['county_name'] == 'Allegan County, MI']
display(allegan)

Shape of dataset: (37848, 7)
Columns: ['county', 'year', 'month', 'unemployment_rate', 'labor_force', 'employment', 'unemployment']

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37848 entries, 0 to 37847
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   county             37848 non-null  object 
 1   year               37848 non-null  int64  
 2   month              37848 non-null  int64  
 3   unemployment_rate  37848 non-null  float64
 4   labor_force        37848 non-null  float64
 5   employment         37848 non-null  float64
 6   unemployment       37848 non-null  float64
dtypes: float64(4), int64(2), object(1)
memory usage: 2.0+ MB
None


Unnamed: 0,county,year,month,unemployment_rate,labor_force,employment,unemployment
0,"Alcona County, MI",1990,1,16.6,636.0,3192.0,3828.0
1,"Alcona County, MI",1990,2,16.2,611.0,3171.0,3782.0
2,"Alcona County, MI",1990,3,15.8,608.0,3246.0,3854.0
3,"Alcona County, MI",1990,4,14.7,579.0,3369.0,3948.0
4,"Alcona County, MI",1990,5,11.7,468.0,3524.0,3992.0


BLS summary shape: (2988, 14)
Index(['county_name', 'survey_year', 'employment_mean', 'employment_min',
       'employment_max', 'unemployment_mean', 'unemployment_min',
       'unemployment_max', 'labor_force_mean', 'labor_force_min',
       'labor_force_max', 'unemployment_rate_mean', 'unemployment_rate_min',
       'unemployment_rate_max'],
      dtype='object')


Unnamed: 0,county_name,survey_year,employment_mean,employment_min,employment_max,unemployment_mean,unemployment_min,unemployment_max,labor_force_mean,labor_force_min,labor_force_max,unemployment_rate_mean,unemployment_rate_min,unemployment_rate_max
72,"Allegan County, MI",1990,43189.692308,42306.0,44049.0,45989.307692,45390.0,46926.0,2799.615385,2480.0,3084.0,6.1,5.3,6.8
73,"Allegan County, MI",1991,42206.153846,41596.0,42727.0,45397.461538,45013.0,45963.0,3191.307692,2689.0,3893.0,7.007692,6.0,8.5
74,"Allegan County, MI",1992,43001.076923,41364.0,44003.0,46239.384615,45268.0,47053.0,3238.307692,2472.0,4118.0,7.007692,5.4,9.0
75,"Allegan County, MI",1993,45664.153846,43474.0,47167.0,48436.076923,46928.0,49708.0,2771.923077,2154.0,3503.0,5.746154,4.4,7.4
76,"Allegan County, MI",1994,48434.538462,46560.0,49838.0,50684.076923,49869.0,51427.0,2249.538462,1577.0,3309.0,4.438462,3.1,6.6
77,"Allegan County, MI",1995,49654.615385,48704.0,50249.0,51614.846154,51094.0,52231.0,1960.230769,1619.0,2390.0,3.807692,3.1,4.7
78,"Allegan County, MI",1996,50862.230769,49004.0,52280.0,52748.230769,51559.0,53779.0,1886.0,1272.0,2555.0,3.592308,2.4,5.0
79,"Allegan County, MI",1997,53459.153846,51654.0,54570.0,55118.692308,53834.0,56108.0,1659.538462,1225.0,2195.0,3.0,2.2,4.0
80,"Allegan County, MI",1998,54737.076923,53658.0,55581.0,56281.307692,55613.0,56958.0,1544.230769,1115.0,2104.0,2.746154,2.0,3.7
81,"Allegan County, MI",1999,56429.846154,55191.0,57710.0,57969.076923,57088.0,59107.0,1539.230769,1175.0,2021.0,2.661538,2.0,3.5


In [2]:
# ===============================
# 2. Load and Clean CDC Data - can select certain sheets (socioeconomic-expanded + socio-economic)
# ===============================

data_dir = "cdc_mi"
files = [f for f in os.listdir(data_dir) if f.endswith(".csv")] # change this to change selected sheets


dfs = []
for file in files:
    file_path = os.path.join(data_dir, file)
    df = pd.read_csv(file_path)
    
    # Keep only Michigan Rows
    if "state_name" in df.columns:
        df = df[df["state_name"].str.contains("Michigan", case=False, na=False)]
    elif "_state" in df.columns:
        df = df[df["_state"] == 26]  # Michigan Fips
    
    # Restrict years
    if "survey_year" in df.columns:
        df = df[(df["survey_year"] >= 1993) & (df["survey_year"] <= 2010)]
    
    dfs.append(df)

# Combine all years
health_core_data = pd.concat(dfs, ignore_index=True)

# Standardize county_name → "County, ST" format
health_core_data["county_name"] = (
    health_core_data["county_name"].str.strip() + ", " +
    health_core_data["state_name"].str.strip().str[:2].str.upper()
)

print("Health core shape:", health_core_data.shape)
print("Columns:", health_core_data.columns.tolist()[:12], "...")
print("Earliest year:", health_core_data["survey_year"].min())
print("Latest year:", health_core_data["survey_year"].max())


Health core shape: (301576, 68)
Columns: ['county_name', 'state_name', 'survey_year', 'final_weight', 'respondent_age', 'respondent_sex', 'self_reported_race', 'hispanic_ethnicity', 'marital_status', 'education_level', 'employment_status', 'num_adults_in_household'] ...
Earliest year: 1993
Latest year: 2010


In [3]:
# ===============================
# 3. Categorical Percent Breakdowns
# ===============================

cat_vars_mi = [
    "general_health_status",
    "smoked_100_cigarettes",
    "eats_fruit",
    "eats_other_vegetables",
    "cholesterol_checked_5yr",
    "currently_has_asthma",
    "ever_had_mammogram",
    "teeth_cleaned_by_dentist",
    "current_smoking_frequency",
    "last_dentist_visit",
    "smoking_status_recode",
    "ever_told_diabetes",
    "flu_shot_past_year",
    "ever_had_pap_smear",
    "ever_told_high_bp",
    "any_alcohol_past_month",
    "any_physical_activity",
    "ever_told_high_cholesterol",
    "ever_told_asthma",
    "ever_told_heart_attack",
    "ever_told_coronary_heart_disease",
    "ever_told_stroke",
    "ever_told_arthritis",
    "ever_had_sigmoidoscopy_colonoscopy",
    "high_blood_pressure_flag",
    "tobacco_use_flag",
    "obesity_flag",
    "overweight_or_obese_flag",
]

percent_dfs = []

for col in cat_vars_mi:
    # --- Unweighted counts ---
    unweighted = (
        health_core_data
        .groupby(["county_name", "survey_year", col], as_index=False)
        .size()
        .rename(columns={"size": "unweighted_count"})
    )

    # Add unweighted percent
    unweighted["unweighted_percent"] = (
        unweighted.groupby(["county_name", "survey_year"])["unweighted_count"]
        .transform(lambda x: 100 * x / x.sum())
    )

    # --- Weighted counts ---
    weighted = (
        health_core_data
        .groupby(["county_name", "survey_year", col], as_index=False)
        .agg(weighted_count=("final_weight", "sum"))
    )

    weighted["weighted_percent"] = (
        weighted.groupby(["county_name", "survey_year"])["weighted_count"]
        .transform(lambda x: 100 * x / x.sum())
    )

    # --- Merge weighted + unweighted ---
    temp = pd.merge(
        unweighted,
        weighted,
        on=["county_name", "survey_year", col],
        how="outer"
    )

    # Label variable and category
    temp["variable"] = col
    temp.rename(columns={col: "category"}, inplace=True)

    temp = temp[
        [
            "county_name", "survey_year", "variable", "category",
            "unweighted_count", "unweighted_percent",
            "weighted_count", "weighted_percent"
        ]
    ]

    percent_dfs.append(temp)

# Combine all variables
percent_breakdowns = pd.concat(percent_dfs, ignore_index=True)

print("Percent breakdowns shape:", percent_breakdowns.shape)
display(percent_breakdowns.head(20))



Percent breakdowns shape: (20265, 8)


Unnamed: 0,county_name,survey_year,variable,category,unweighted_count,unweighted_percent,weighted_count,weighted_percent
0,"Allegan County, MI",2002,general_health_status,1.0,28,20.895522,47991.354204,22.992329
1,"Allegan County, MI",2002,general_health_status,2.0,50,37.313433,68666.554369,32.897675
2,"Allegan County, MI",2002,general_health_status,3.0,40,29.850746,73683.278566,35.301153
3,"Allegan County, MI",2002,general_health_status,4.0,4,2.985075,3736.412065,1.790089
4,"Allegan County, MI",2002,general_health_status,5.0,10,7.462687,13064.349302,6.25904
5,"Allegan County, MI",2002,general_health_status,9.0,2,1.492537,1585.732815,0.759714
6,"Allegan County, MI",2004,general_health_status,1.0,20,15.384615,41302.771355,21.444954
7,"Allegan County, MI",2004,general_health_status,2.0,40,30.769231,53356.234115,27.703274
8,"Allegan County, MI",2004,general_health_status,3.0,50,38.461538,74055.416046,38.450568
9,"Allegan County, MI",2004,general_health_status,4.0,16,12.307692,17991.407097,9.341381


In [4]:
# ===============================
# 4. Pivot CDC Data to Wide Format
# ===============================

wide_codebook_map = {
    # ---- general health status ----
    "general_health_status_1.0_uw": "general_health_status_Excellent_uw",
    "general_health_status_2.0_uw": "general_health_status_Very good_uw",
    "general_health_status_3.0_uw": "general_health_status_Good_uw",
    "general_health_status_4.0_uw": "general_health_status_Fair_uw",
    "general_health_status_5.0_uw": "general_health_status_Poor_uw",
    "general_health_status_7.0_uw": "general_health_status_Dont know_uw",
    "general_health_status_9.0_uw": "general_health_status_Refused_uw",

    "general_health_status_1.0_w": "general_health_status_Excellent_w",
    "general_health_status_2.0_w": "general_health_status_Very good_w",
    "general_health_status_3.0_w": "general_health_status_Good_w",
    "general_health_status_4.0_w": "general_health_status_Fair_w",
    "general_health_status_5.0_w": "general_health_status_Poor_w",
    "general_health_status_7.0_w": "general_health_status_Dont know_w",
    "general_health_status_9.0_w": "general_health_status_Refused_w",

    # ---- ever told high BP ----
    "ever_told_high_bp_1.0_uw": "ever_told_high_bp_Yes_uw",
    "ever_told_high_bp_2.0_uw": "ever_told_high_bp_No_uw",
    "ever_told_high_bp_7.0_uw": "ever_told_high_bp_Dont know_uw",
    "ever_told_high_bp_9.0_uw": "ever_told_high_bp_Refused_uw",

    "ever_told_high_bp_1.0_w": "ever_told_high_bp_Yes_w",
    "ever_told_high_bp_2.0_w": "ever_told_high_bp_No_w",
    "ever_told_high_bp_7.0_w": "ever_told_high_bp_Dont know_w",
    "ever_told_high_bp_9.0_w": "ever_told_high_bp_Refused_w",

    # ---- ever told diabetes ----
    "ever_told_diabetes_1.0_uw": "ever_told_diabetes_Yes_uw",
    "ever_told_diabetes_2.0_uw": "ever_told_diabetes_No_uw",
    "ever_told_diabetes_3.0_uw": "ever_told_diabetes_Pregnancy_uw",
    "ever_told_diabetes_4.0_uw": "ever_told_diabetes_Prediabetes_uw",
    "ever_told_diabetes_7.0_uw": "ever_told_diabetes_Dont know_uw",
    "ever_told_diabetes_9.0_uw": "ever_told_diabetes_Refused_uw",

    "ever_told_diabetes_1.0_w": "ever_told_diabetes_Yes_w",
    "ever_told_diabetes_2.0_w": "ever_told_diabetes_No_w",
    "ever_told_diabetes_3.0_w": "ever_told_diabetes_Pregnancy_w",
    "ever_told_diabetes_4.0_w": "ever_told_diabetes_Prediabetes_w",
    "ever_told_diabetes_7.0_w": "ever_told_diabetes_Dont know_w",
    "ever_told_diabetes_9.0_w": "ever_told_diabetes_Refused_w",

    # ---- smoked 100 cigs ----
    "smoked_100_cigarettes_1.0_uw": "smoked_100_cigarettes_Yes_uw",
    "smoked_100_cigarettes_2.0_uw": "smoked_100_cigarettes_No_uw",
    "smoked_100_cigarettes_7.0_uw": "smoked_100_cigarettes_Dont know_uw",
    "smoked_100_cigarettes_9.0_uw": "smoked_100_cigarettes_Refused_uw",

    "smoked_100_cigarettes_1.0_w": "smoked_100_cigarettes_Yes_w",
    "smoked_100_cigarettes_2.0_w": "smoked_100_cigarettes_No_w",
    "smoked_100_cigarettes_7.0_w": "smoked_100_cigarettes_Dont know_w",
    "smoked_100_cigarettes_9.0_w": "smoked_100_cigarettes_Refused_w",

    # ---- current smoking frequency ----
    "current_smoking_frequency_1.0_uw": "current_smoking_frequency_Every day_uw",
    "current_smoking_frequency_2.0_uw": "current_smoking_frequency_Some days_uw",
    "current_smoking_frequency_3.0_uw": "current_smoking_frequency_Not at all_uw",
    "current_smoking_frequency_7.0_uw": "current_smoking_frequency_Dont know_uw",
    "current_smoking_frequency_9.0_uw": "current_smoking_frequency_Refused_uw",

    "current_smoking_frequency_1.0_w": "current_smoking_frequency_Every day_w",
    "current_smoking_frequency_2.0_w": "current_smoking_frequency_Some days_w",
    "current_smoking_frequency_3.0_w": "current_smoking_frequency_Not at all_w",
    "current_smoking_frequency_7.0_w": "current_smoking_frequency_Dont know_w",
    "current_smoking_frequency_9.0_w": "current_smoking_frequency_Refused_w",

    # ---- any alcohol past month ----
    "any_alcohol_past_month_1.0_uw": "any_alcohol_past_month_Yes_uw",
    "any_alcohol_past_month_2.0_uw": "any_alcohol_past_month_No_uw",
    "any_alcohol_past_month_7.0_uw": "any_alcohol_past_month_Dont know_uw",
    "any_alcohol_past_month_9.0_uw": "any_alcohol_past_month_Refused_uw",

    "any_alcohol_past_month_1.0_w": "any_alcohol_past_month_Yes_w",
    "any_alcohol_past_month_2.0_w": "any_alcohol_past_month_No_w",
    "any_alcohol_past_month_7.0_w": "any_alcohol_past_month_Dont know_w",
    "any_alcohol_past_month_9.0_w": "any_alcohol_past_month_Refused_w",

    # ---- eats fruit ----
    "eats_fruit_1.0_uw": "eats_fruit_Yes_uw",
    "eats_fruit_2.0_uw": "eats_fruit_No_uw",
    "eats_fruit_7.0_uw": "eats_fruit_Dont know_uw",
    "eats_fruit_9.0_uw": "eats_fruit_Refused_uw",

    "eats_fruit_1.0_w": "eats_fruit_Yes_w",
    "eats_fruit_2.0_w": "eats_fruit_No_w",
    "eats_fruit_7.0_w": "eats_fruit_Dont know_w",
    "eats_fruit_9.0_w": "eats_fruit_Refused_w",

    # ---- eats vegetables ----
    "eats_other_vegetables_1.0_uw": "eats_other_vegetables_Yes_uw",
    "eats_other_vegetables_2.0_uw": "eats_other_vegetables_No_uw",
    "eats_other_vegetables_7.0_uw": "eats_other_vegetables_Dont know_uw",
    "eats_other_vegetables_9.0_uw": "eats_other_vegetables_Refused_uw",

    "eats_other_vegetables_1.0_w": "eats_other_vegetables_Yes_w",
    "eats_other_vegetables_2.0_w": "eats_other_vegetables_No_w",
    "eats_other_vegetables_7.0_w": "eats_other_vegetables_Dont know_w",
    "eats_other_vegetables_9.0_w": "eats_other_vegetables_Refused_w",

    # ---- any physical activity ----
    "any_physical_activity_1.0_uw": "any_physical_activity_Yes_uw",
    "any_physical_activity_2.0_uw": "any_physical_activity_No_uw",
    "any_physical_activity_7.0_uw": "any_physical_activity_Dont know_uw",
    "any_physical_activity_9.0_uw": "any_physical_activity_Refused_uw",

    "any_physical_activity_1.0_w": "any_physical_activity_Yes_w",
    "any_physical_activity_2.0_w": "any_physical_activity_No_w",
    "any_physical_activity_7.0_w": "any_physical_activity_Dont know_w",
    "any_physical_activity_9.0_w": "any_physical_activity_Refused_w",
}

cdc_wide = (
    percent_breakdowns
    .pivot_table(
        index=["county_name", "survey_year"],
        columns=["variable", "category"],
        values=["unweighted_percent", "weighted_percent"],
        fill_value=0
    )
)

# Flatten MultiIndex into readable column names
cdc_wide.columns = [
    f"{var}_{cat}_uw" if val == "unweighted_percent" else f"{var}_{cat}_w"
    for val, var, cat in cdc_wide.columns
]

cdc_wide = cdc_wide.rename(columns=wide_codebook_map)

print("CDC wide shape:", cdc_wide.shape)
display(cdc_wide.head())


CDC wide shape: (521, 556)


Unnamed: 0_level_0,Unnamed: 1_level_0,any_alcohol_past_month_Yes_uw,any_alcohol_past_month_No_uw,any_alcohol_past_month_Dont know_uw,any_physical_activity_Yes_uw,any_physical_activity_No_uw,any_physical_activity_Dont know_uw,cholesterol_checked_5yr_1.0_uw,cholesterol_checked_5yr_2.0_uw,cholesterol_checked_5yr_3.0_uw,cholesterol_checked_5yr_4.0_uw,...,smoking_status_recode_6.0_w,smoking_status_recode_9.0_w,teeth_cleaned_by_dentist_1.0_w,teeth_cleaned_by_dentist_2.0_w,teeth_cleaned_by_dentist_3.0_w,teeth_cleaned_by_dentist_4.0_w,teeth_cleaned_by_dentist_7.0_w,teeth_cleaned_by_dentist_8.0_w,teeth_cleaned_by_dentist_9.0_w,tobacco_use_flag_9.0_w
county_name,survey_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"Allegan County, MI",2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,83.29386,7.090809,3.30934,4.750868,1.555123,0.0,0.0,0.0
"Allegan County, MI",2004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,70.220249,12.017025,8.110495,9.652232,0.0,0.0,0.0,0.0
"Allegan County, MI",2005,0.0,0.0,0.0,0.0,0.0,0.0,68.333333,18.333333,7.5,4.166667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Allegan County, MI",2006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,67.445812,2.942859,19.254181,9.137252,0.0,1.219896,0.0,0.0
"Allegan County, MI",2007,0.0,0.0,0.0,0.0,0.0,0.0,73.469388,12.244898,4.081633,8.163265,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# ===============================
# 5. Apply Readable Labels (Codebook)
# ===============================
cdc_wide = cdc_wide.rename(columns=wide_codebook_map)

print("CDC wide shape:", cdc_wide.shape)
display(cdc_wide.head())

CDC wide shape: (521, 556)


Unnamed: 0_level_0,Unnamed: 1_level_0,any_alcohol_past_month_Yes_uw,any_alcohol_past_month_No_uw,any_alcohol_past_month_Dont know_uw,any_physical_activity_Yes_uw,any_physical_activity_No_uw,any_physical_activity_Dont know_uw,cholesterol_checked_5yr_1.0_uw,cholesterol_checked_5yr_2.0_uw,cholesterol_checked_5yr_3.0_uw,cholesterol_checked_5yr_4.0_uw,...,smoking_status_recode_6.0_w,smoking_status_recode_9.0_w,teeth_cleaned_by_dentist_1.0_w,teeth_cleaned_by_dentist_2.0_w,teeth_cleaned_by_dentist_3.0_w,teeth_cleaned_by_dentist_4.0_w,teeth_cleaned_by_dentist_7.0_w,teeth_cleaned_by_dentist_8.0_w,teeth_cleaned_by_dentist_9.0_w,tobacco_use_flag_9.0_w
county_name,survey_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"Allegan County, MI",2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,83.29386,7.090809,3.30934,4.750868,1.555123,0.0,0.0,0.0
"Allegan County, MI",2004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,70.220249,12.017025,8.110495,9.652232,0.0,0.0,0.0,0.0
"Allegan County, MI",2005,0.0,0.0,0.0,0.0,0.0,0.0,68.333333,18.333333,7.5,4.166667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Allegan County, MI",2006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,67.445812,2.942859,19.254181,9.137252,0.0,1.219896,0.0,0.0
"Allegan County, MI",2007,0.0,0.0,0.0,0.0,0.0,0.0,73.469388,12.244898,4.081633,8.163265,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# ===============================
# 6. Merge CDC Health and BLS Labor Data
# ===============================

merged = pd.merge(
    cdc_wide,
    bls_summary,
    on=["county_name", "survey_year"],
    how="inner"   # inner join
)

print("Final merged shape:", merged.shape)
display(merged.head())

Final merged shape: (521, 570)


Unnamed: 0,county_name,survey_year,any_alcohol_past_month_Yes_uw,any_alcohol_past_month_No_uw,any_alcohol_past_month_Dont know_uw,any_physical_activity_Yes_uw,any_physical_activity_No_uw,any_physical_activity_Dont know_uw,cholesterol_checked_5yr_1.0_uw,cholesterol_checked_5yr_2.0_uw,...,employment_max,unemployment_mean,unemployment_min,unemployment_max,labor_force_mean,labor_force_min,labor_force_max,unemployment_rate_mean,unemployment_rate_min,unemployment_rate_max
0,"Allegan County, MI",2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,53391.0,55116.384615,53223.0,56804.0,3098.615385,2456.0,3558.0,5.623077,4.5,6.5
1,"Allegan County, MI",2004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,51389.0,53465.230769,52608.0,55016.0,3430.846154,2994.0,3857.0,6.430769,5.7,7.3
2,"Allegan County, MI",2005,0.0,0.0,0.0,0.0,0.0,0.0,68.333333,18.333333,...,53006.0,55153.0,53297.0,56522.0,3445.384615,2929.0,4086.0,6.246154,5.3,7.6
3,"Allegan County, MI",2006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,54224.0,56370.076923,55270.0,57793.0,3468.076923,2892.0,3883.0,6.161538,5.2,7.0
4,"Allegan County, MI",2007,0.0,0.0,0.0,0.0,0.0,0.0,73.469388,12.244898,...,53748.0,56008.615385,55096.0,57299.0,3568.384615,3119.0,3989.0,6.376923,5.6,7.2


In [7]:
# Save merged dataset to CSV
merged.to_csv("michigan_health_bls_merged.csv", index=False)

print("✅ Exported merged dataset to michigan_health_bls_merged.csv")

✅ Exported merged dataset to michigan_health_bls_merged.csv


## Summary

The result of this pivot table process is 