# Loading LAUS and BRFSS CDC Data and Pivoting BRFSS for California

In this notebook, we will be loading the BRFSS and LAUS datasets for California and pivoting the BRFSS dataset from its current format (individual survey responses) into percentages that can be compared with the data for unemployment from LAUS, aligned to each county, for each year. We will also import weights for the BRFSS survey questions, for processing data further downstream into weighted and unweighted categories for regression, and apply a codebook.

In [19]:
# ===============================
# 1. Setup and Load BLS Data
# ===============================

import pandas as pd
import os
import re
import numpy as np

# Load the BLS CSV file
df = pd.read_csv("bls_california/california_county_employment.csv")


print("Shape of dataset:", df.shape)
print("Columns:", df.columns.tolist())
print("\nInfo:")
print(df.info())
display(df.head())

# Standardize column names
df = df.rename(columns={"county": "county_name", "year": "survey_year"})

# Collapse monthly values to yearly summaries
bls_summary = (
    df.groupby(["county_name", "survey_year"])
    .agg({
        "employment": ["mean", "min", "max"],
        "unemployment": ["mean", "min", "max"],
        "labor_force": ["mean", "min", "max"],
        "unemployment_rate": ["mean", "min", "max"]
    })
    .reset_index()
)

# Flatten hierarchical columns
bls_summary.columns = ["_".join(c).strip("_") for c in bls_summary.columns.values]

print("BLS summary shape:", bls_summary.shape)
display(bls_summary.head())

Shape of dataset: (26449, 7)
Columns: ['county', 'year', 'month', 'employment', 'unemployment', 'labor_force', 'unemployment_rate']

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26449 entries, 0 to 26448
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   county             26449 non-null  object 
 1   year               26449 non-null  int64  
 2   month              26449 non-null  int64  
 3   employment         26449 non-null  float64
 4   unemployment       26449 non-null  float64
 5   labor_force        26449 non-null  float64
 6   unemployment_rate  26449 non-null  float64
dtypes: float64(4), int64(2), object(1)
memory usage: 1.4+ MB
None


Unnamed: 0,county,year,month,employment,unemployment,labor_force,unemployment_rate
0,"Alameda County, CA",1990,1,657058.0,683503.0,26445.0,3.9
1,"Alameda County, CA",1990,2,650679.0,676039.0,25360.0,3.8
2,"Alameda County, CA",1990,3,649735.0,673971.0,24236.0,3.6
3,"Alameda County, CA",1990,4,645659.0,671250.0,25591.0,3.8
4,"Alameda County, CA",1990,5,646698.0,671949.0,25251.0,3.8


BLS summary shape: (2088, 14)


Unnamed: 0,county_name,survey_year,employment_mean,employment_min,employment_max,unemployment_mean,unemployment_min,unemployment_max,labor_force_mean,labor_force_min,labor_force_max,unemployment_rate_mean,unemployment_rate_min,unemployment_rate_max
0,"Alameda County, CA",1990,650569.538462,645537.0,657058.0,678021.153846,671250.0,683547.0,27451.615385,24236.0,30648.0,4.053846,3.6,4.5
1,"Alameda County, CA",1991,635627.307692,625365.0,641120.0,672547.230769,663918.0,682083.0,36919.923077,34344.0,40963.0,5.492308,5.1,6.0
2,"Alameda County, CA",1992,632270.230769,628172.0,638912.0,677020.615385,670513.0,681892.0,44750.384615,41567.0,49198.0,6.607692,6.1,7.2
3,"Alameda County, CA",1993,632021.076923,624134.0,639235.0,677319.923077,669864.0,681111.0,45298.846154,39404.0,49620.0,6.676923,5.8,7.3
4,"Alameda County, CA",1994,636908.769231,633433.0,643742.0,678967.769231,675462.0,684694.0,42059.0,32608.0,46901.0,6.192308,4.8,6.9


In [20]:
# ===============================
# 2. Load and Clean CDC Data - can select certain sheets (socioeconomic-expanded + socio-economic)
# ===============================

data_dir = "cdc_california"
files = [f for f in os.listdir(data_dir) if f.endswith(".csv")] # change this to change selected sheets


dfs = []
for file in files:
    file_path = os.path.join(data_dir, file)
    df = pd.read_csv(file_path)
    
    # Keep only California Rows
    if "state_name" in df.columns:
        df = df[df["state_name"].str.contains("California", case=False, na=False)]
    elif "_state" in df.columns:
        df = df[df["_state"] == 6]  # Cali FIPS
    
    # Restrict years
    if "survey_year" in df.columns:
        df = df[(df["survey_year"] >= 1993) & (df["survey_year"] <= 2010)]
    
    dfs.append(df)

# Combine all years
health_core_data = pd.concat(dfs, ignore_index=True)

# Standardize county_name → "County, ST" format
health_core_data["county_name"] = (
    health_core_data["county_name"].str.strip() + ", " +
    health_core_data["state_name"].str.strip().str[:2].str.upper()
)

print("Health core shape:", health_core_data.shape)
print("Columns:", health_core_data.columns.tolist()[:12], "...")
print("Earliest year:", health_core_data["survey_year"].min())
print("Latest year:", health_core_data["survey_year"].max())


Health core shape: (414720, 68)
Columns: ['county_name', 'state_name', 'survey_year', 'final_weight', 'respondent_age', 'respondent_sex', 'self_reported_race', 'hispanic_ethnicity', 'marital_status', 'education_level', 'employment_status', 'num_adults_in_household'] ...
Earliest year: 1993
Latest year: 2010


In [22]:
# ===============================
# 3. Categorical Percent Breakdowns
# ===============================

cat_vars_ca = [
    "general_health_status",
    "smoked_100_cigarettes",
    "eats_fruit",
    "eats_other_vegetables",
    "cholesterol_checked_5yr",
    "currently_has_asthma",
    "ever_had_mammogram",
    "teeth_cleaned_by_dentist",
    "current_smoking_frequency",
    "last_dentist_visit",
    "smoking_status_recode",
    "ever_told_diabetes",
    "flu_shot_past_year",
    "ever_had_pap_smear",
    "ever_told_high_bp",
    "any_alcohol_past_month",
    "any_physical_activity",
    "ever_told_high_cholesterol",
    "ever_told_asthma",
    "ever_told_heart_attack",
    "ever_told_coronary_heart_disease",
    "ever_told_stroke",
    "ever_told_arthritis",
    "ever_had_sigmoidoscopy_colonoscopy",
    "high_blood_pressure_flag",
    "tobacco_use_flag",
    "obesity_flag",
    "overweight_or_obese_flag",
]

percent_dfs = []

for col in cat_vars_ca:
    # --- Unweighted counts ---
    unweighted = (
        health_core_data
        .groupby(["county_name", "survey_year", col], as_index=False)
        .size()
        .rename(columns={"size": "unweighted_count"})
    )

    # Add unweighted percent
    unweighted["unweighted_percent"] = (
        unweighted.groupby(["county_name", "survey_year"])["unweighted_count"]
        .transform(lambda x: 100 * x / x.sum())
    )

    # --- Weighted counts ---
    weighted = (
        health_core_data
        .groupby(["county_name", "survey_year", col], as_index=False)
        .agg(weighted_count=("final_weight", "sum"))
    )

    weighted["weighted_percent"] = (
        weighted.groupby(["county_name", "survey_year"])["weighted_count"]
        .transform(lambda x: 100 * x / x.sum())
    )

    # --- Merge weighted + unweighted ---
    temp = pd.merge(
        unweighted,
        weighted,
        on=["county_name", "survey_year", col],
        how="outer"
    )

    # Label variable and category
    temp["variable"] = col
    temp.rename(columns={col: "category"}, inplace=True)

    temp = temp[
        [
            "county_name", "survey_year", "variable", "category",
            "unweighted_count", "unweighted_percent",
            "weighted_count", "weighted_percent"
        ]
    ]

    percent_dfs.append(temp)

# Combine all variables
percent_breakdowns = pd.concat(percent_dfs, ignore_index=True)

print("Percent breakdowns shape:", percent_breakdowns.shape)
display(percent_breakdowns.head(20))



Percent breakdowns shape: (21901, 8)


Unnamed: 0,county_name,survey_year,variable,category,unweighted_count,unweighted_percent,weighted_count,weighted_percent
0,"Alameda County, CA",1993,general_health_status,1.0,68,24.637681,388169.6,23.199992
1,"Alameda County, CA",1993,general_health_status,2.0,106,38.405797,711974.52,42.553056
2,"Alameda County, CA",1993,general_health_status,3.0,68,24.637681,388518.66,23.220854
3,"Alameda County, CA",1993,general_health_status,4.0,22,7.971014,107571.66,6.429307
4,"Alameda County, CA",1993,general_health_status,5.0,12,4.347826,76911.0,4.596791
5,"Alameda County, CA",1994,general_health_status,1.0,60,17.751479,289534.26,17.26533
6,"Alameda County, CA",1994,general_health_status,2.0,122,36.094675,668496.04,39.863347
7,"Alameda County, CA",1994,general_health_status,3.0,106,31.360947,496564.16,29.61081
8,"Alameda County, CA",1994,general_health_status,4.0,36,10.650888,150646.72,8.983273
9,"Alameda County, CA",1994,general_health_status,5.0,12,3.550296,64473.64,3.844653


In [23]:
# ===============================
# 4. Pivot CDC Data to Wide Format
# ===============================
cdc_wide = (
    percent_breakdowns
    .pivot_table(
        index=["county_name", "survey_year"],
        columns=["variable", "category"],
        values=["unweighted_percent", "weighted_percent"],  # <-- list of both
        fill_value=0
    )
)

# Flatten MultiIndex into readable column names
cdc_wide.columns = [
    f"{var}_{cat}_uw" if val == "unweighted_percent" else f"{var}_{cat}_w"
    for val, var, cat in cdc_wide.columns
]

cdc_wide = cdc_wide.reset_index()

print("CDC wide shape:", cdc_wide.shape)
display(cdc_wide.head())


CDC wide shape: (492, 588)


Unnamed: 0,county_name,survey_year,any_alcohol_past_month_1.0_uw,any_alcohol_past_month_2.0_uw,any_alcohol_past_month_7.0_uw,any_alcohol_past_month_9.0_uw,any_physical_activity_1.0_uw,any_physical_activity_2.0_uw,any_physical_activity_7.0_uw,cholesterol_checked_5yr_1.0_uw,...,smoking_status_recode_6.0_w,smoking_status_recode_9.0_w,teeth_cleaned_by_dentist_1.0_w,teeth_cleaned_by_dentist_2.0_w,teeth_cleaned_by_dentist_3.0_w,teeth_cleaned_by_dentist_4.0_w,teeth_cleaned_by_dentist_7.0_w,teeth_cleaned_by_dentist_8.0_w,teeth_cleaned_by_dentist_9.0_w,tobacco_use_flag_9.0_w
0,"Alameda County, CA",1993,62.318841,36.956522,0.724638,0.0,0.0,0.0,0.0,60.952381,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
1,"Alameda County, CA",1994,0.0,0.0,0.0,0.0,78.698225,21.301775,0.0,0.0,...,54.424647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
2,"Alameda County, CA",1995,68.27957,31.72043,0.0,0.0,81.182796,18.817204,0.0,67.741935,...,47.506844,0.699723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
3,"Alameda County, CA",1996,63.190184,36.809816,0.0,0.0,84.04908,15.95092,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
4,"Alameda County, CA",1997,73.053892,26.946108,0.0,0.0,0.0,0.0,0.0,53.030303,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0


In [28]:
# ===============================
# 4. Pivot CDC Data to Wide Format
# ===============================

wide_codebook_map = {
    # ---- general health status ----
    "general_health_status_1.0_uw": "general_health_status_Excellent_uw",
    "general_health_status_2.0_uw": "general_health_status_Very good_uw",
    "general_health_status_3.0_uw": "general_health_status_Good_uw",
    "general_health_status_4.0_uw": "general_health_status_Fair_uw",
    "general_health_status_5.0_uw": "general_health_status_Poor_uw",
    "general_health_status_7.0_uw": "general_health_status_Dont know_uw",
    "general_health_status_9.0_uw": "general_health_status_Refused_uw",

    "general_health_status_1.0_w": "general_health_status_Excellent_w",
    "general_health_status_2.0_w": "general_health_status_Very good_w",
    "general_health_status_3.0_w": "general_health_status_Good_w",
    "general_health_status_4.0_w": "general_health_status_Fair_w",
    "general_health_status_5.0_w": "general_health_status_Poor_w",
    "general_health_status_7.0_w": "general_health_status_Dont know_w",
    "general_health_status_9.0_w": "general_health_status_Refused_w",

    # ---- ever told high BP ----
    "ever_told_high_bp_1.0_uw": "ever_told_high_bp_Yes_uw",
    "ever_told_high_bp_2.0_uw": "ever_told_high_bp_No_uw",
    "ever_told_high_bp_7.0_uw": "ever_told_high_bp_Dont know_uw",
    "ever_told_high_bp_9.0_uw": "ever_told_high_bp_Refused_uw",

    "ever_told_high_bp_1.0_w": "ever_told_high_bp_Yes_w",
    "ever_told_high_bp_2.0_w": "ever_told_high_bp_No_w",
    "ever_told_high_bp_7.0_w": "ever_told_high_bp_Dont know_w",
    "ever_told_high_bp_9.0_w": "ever_told_high_bp_Refused_w",

    # ---- ever told diabetes ----
    "ever_told_diabetes_1.0_uw": "ever_told_diabetes_Yes_uw",
    "ever_told_diabetes_2.0_uw": "ever_told_diabetes_No_uw",
    "ever_told_diabetes_3.0_uw": "ever_told_diabetes_Pregnancy_uw",
    "ever_told_diabetes_4.0_uw": "ever_told_diabetes_Prediabetes_uw",
    "ever_told_diabetes_7.0_uw": "ever_told_diabetes_Dont know_uw",
    "ever_told_diabetes_9.0_uw": "ever_told_diabetes_Refused_uw",

    "ever_told_diabetes_1.0_w": "ever_told_diabetes_Yes_w",
    "ever_told_diabetes_2.0_w": "ever_told_diabetes_No_w",
    "ever_told_diabetes_3.0_w": "ever_told_diabetes_Pregnancy_w",
    "ever_told_diabetes_4.0_w": "ever_told_diabetes_Prediabetes_w",
    "ever_told_diabetes_7.0_w": "ever_told_diabetes_Dont know_w",
    "ever_told_diabetes_9.0_w": "ever_told_diabetes_Refused_w",

    # ---- smoked 100 cigs ----
    "smoked_100_cigarettes_1.0_uw": "smoked_100_cigarettes_Yes_uw",
    "smoked_100_cigarettes_2.0_uw": "smoked_100_cigarettes_No_uw",
    "smoked_100_cigarettes_7.0_uw": "smoked_100_cigarettes_Dont know_uw",
    "smoked_100_cigarettes_9.0_uw": "smoked_100_cigarettes_Refused_uw",

    "smoked_100_cigarettes_1.0_w": "smoked_100_cigarettes_Yes_w",
    "smoked_100_cigarettes_2.0_w": "smoked_100_cigarettes_No_w",
    "smoked_100_cigarettes_7.0_w": "smoked_100_cigarettes_Dont know_w",
    "smoked_100_cigarettes_9.0_w": "smoked_100_cigarettes_Refused_w",

    # ---- current smoking frequency ----
    "current_smoking_frequency_1.0_uw": "current_smoking_frequency_Every day_uw",
    "current_smoking_frequency_2.0_uw": "current_smoking_frequency_Some days_uw",
    "current_smoking_frequency_3.0_uw": "current_smoking_frequency_Not at all_uw",
    "current_smoking_frequency_7.0_uw": "current_smoking_frequency_Dont know_uw",
    "current_smoking_frequency_9.0_uw": "current_smoking_frequency_Refused_uw",

    "current_smoking_frequency_1.0_w": "current_smoking_frequency_Every day_w",
    "current_smoking_frequency_2.0_w": "current_smoking_frequency_Some days_w",
    "current_smoking_frequency_3.0_w": "current_smoking_frequency_Not at all_w",
    "current_smoking_frequency_7.0_w": "current_smoking_frequency_Dont know_w",
    "current_smoking_frequency_9.0_w": "current_smoking_frequency_Refused_w",

    # ---- any alcohol past month ----
    "any_alcohol_past_month_1.0_uw": "any_alcohol_past_month_Yes_uw",
    "any_alcohol_past_month_2.0_uw": "any_alcohol_past_month_No_uw",
    "any_alcohol_past_month_7.0_uw": "any_alcohol_past_month_Dont know_uw",
    "any_alcohol_past_month_9.0_uw": "any_alcohol_past_month_Refused_uw",

    "any_alcohol_past_month_1.0_w": "any_alcohol_past_month_Yes_w",
    "any_alcohol_past_month_2.0_w": "any_alcohol_past_month_No_w",
    "any_alcohol_past_month_7.0_w": "any_alcohol_past_month_Dont know_w",
    "any_alcohol_past_month_9.0_w": "any_alcohol_past_month_Refused_w",

    # ---- eats fruit ----
    "eats_fruit_1.0_uw": "eats_fruit_Yes_uw",
    "eats_fruit_2.0_uw": "eats_fruit_No_uw",
    "eats_fruit_7.0_uw": "eats_fruit_Dont know_uw",
    "eats_fruit_9.0_uw": "eats_fruit_Refused_uw",

    "eats_fruit_1.0_w": "eats_fruit_Yes_w",
    "eats_fruit_2.0_w": "eats_fruit_No_w",
    "eats_fruit_7.0_w": "eats_fruit_Dont know_w",
    "eats_fruit_9.0_w": "eats_fruit_Refused_w",

    # ---- eats vegetables ----
    "eats_other_vegetables_1.0_uw": "eats_other_vegetables_Yes_uw",
    "eats_other_vegetables_2.0_uw": "eats_other_vegetables_No_uw",
    "eats_other_vegetables_7.0_uw": "eats_other_vegetables_Dont know_uw",
    "eats_other_vegetables_9.0_uw": "eats_other_vegetables_Refused_uw",

    "eats_other_vegetables_1.0_w": "eats_other_vegetables_Yes_w",
    "eats_other_vegetables_2.0_w": "eats_other_vegetables_No_w",
    "eats_other_vegetables_7.0_w": "eats_other_vegetables_Dont know_w",
    "eats_other_vegetables_9.0_w": "eats_other_vegetables_Refused_w",

    # ---- any physical activity ----
    "any_physical_activity_1.0_uw": "any_physical_activity_Yes_uw",
    "any_physical_activity_2.0_uw": "any_physical_activity_No_uw",
    "any_physical_activity_7.0_uw": "any_physical_activity_Dont know_uw",
    "any_physical_activity_9.0_uw": "any_physical_activity_Refused_uw",

    "any_physical_activity_1.0_w": "any_physical_activity_Yes_w",
    "any_physical_activity_2.0_w": "any_physical_activity_No_w",
    "any_physical_activity_7.0_w": "any_physical_activity_Dont know_w",
    "any_physical_activity_9.0_w": "any_physical_activity_Refused_w",
}

cdc_wide = (
    percent_breakdowns
    .pivot_table(
        index=["county_name", "survey_year"],
        columns=["variable", "category"],
        values=["unweighted_percent", "weighted_percent"],  # <-- list of both
        fill_value=0
    )
)

# Flatten MultiIndex into readable column names
cdc_wide.columns = [
    f"{var}_{cat}_uw" if val == "unweighted_percent" else f"{var}_{cat}_w"
    for val, var, cat in cdc_wide.columns
]


CDC wide shape: (492, 586)


Unnamed: 0_level_0,Unnamed: 1_level_0,any_alcohol_past_month_Yes_uw,any_alcohol_past_month_No_uw,any_alcohol_past_month_Dont know_uw,any_alcohol_past_month_Refused_uw,any_physical_activity_Yes_uw,any_physical_activity_No_uw,any_physical_activity_Dont know_uw,cholesterol_checked_5yr_1.0_uw,cholesterol_checked_5yr_2.0_uw,cholesterol_checked_5yr_3.0_uw,...,smoking_status_recode_6.0_w,smoking_status_recode_9.0_w,teeth_cleaned_by_dentist_1.0_w,teeth_cleaned_by_dentist_2.0_w,teeth_cleaned_by_dentist_3.0_w,teeth_cleaned_by_dentist_4.0_w,teeth_cleaned_by_dentist_7.0_w,teeth_cleaned_by_dentist_8.0_w,teeth_cleaned_by_dentist_9.0_w,tobacco_use_flag_9.0_w
county_name,survey_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"Alameda County, CA",1993,62.318841,36.956522,0.724638,0.0,0.0,0.0,0.0,60.952381,19.047619,17.142857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
"Alameda County, CA",1994,0.0,0.0,0.0,0.0,78.698225,21.301775,0.0,0.0,0.0,0.0,...,54.424647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
"Alameda County, CA",1995,68.27957,31.72043,0.0,0.0,81.182796,18.817204,0.0,67.741935,12.096774,14.516129,...,47.506844,0.699723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
"Alameda County, CA",1996,63.190184,36.809816,0.0,0.0,84.04908,15.95092,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
"Alameda County, CA",1997,73.053892,26.946108,0.0,0.0,0.0,0.0,0.0,53.030303,23.484848,18.939394,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0


In [None]:
# ===============================
# 5. Apply Readable Labels (Codebook)
# ===============================
cdc_wide = cdc_wide.rename(columns=wide_codebook_map)

print("CDC wide shape:", cdc_wide.shape)
display(cdc_wide.head())


## Summary

With the LA.data files processed successfully, you should have 50 CSVs within a subfolder called state_csv. These correspond to each state, named with the state name from the La.data file.

- In future notebooks, we will process the BRFSS data into the same format using a pivot table and a renaming map constructed from the BRFSS codebook.

In [29]:
# ===============================
# 5. Merge CDC Health and BLS Labor Data
# ===============================

merged = pd.merge(
    cdc_wide,
    bls_summary,
    on=["county_name", "survey_year"],
    how="inner"   # inner join: only keep counties/years that appear in both
)

print("Final merged shape:", merged.shape)
display(merged.head())

Final merged shape: (474, 600)


Unnamed: 0,county_name,survey_year,any_alcohol_past_month_Yes_uw,any_alcohol_past_month_No_uw,any_alcohol_past_month_Dont know_uw,any_alcohol_past_month_Refused_uw,any_physical_activity_Yes_uw,any_physical_activity_No_uw,any_physical_activity_Dont know_uw,cholesterol_checked_5yr_1.0_uw,...,employment_max,unemployment_mean,unemployment_min,unemployment_max,labor_force_mean,labor_force_min,labor_force_max,unemployment_rate_mean,unemployment_rate_min,unemployment_rate_max
0,"Alameda County, CA",1993,62.318841,36.956522,0.724638,0.0,0.0,0.0,0.0,60.952381,...,639235.0,677319.923077,669864.0,681111.0,45298.846154,39404.0,49620.0,6.676923,5.8,7.3
1,"Alameda County, CA",1994,0.0,0.0,0.0,0.0,78.698225,21.301775,0.0,0.0,...,643742.0,678967.769231,675462.0,684694.0,42059.0,32608.0,46901.0,6.192308,4.8,6.9
2,"Alameda County, CA",1995,68.27957,31.72043,0.0,0.0,81.182796,18.817204,0.0,67.741935,...,642535.0,676034.846154,668883.0,684352.0,38880.307692,32278.0,44668.0,5.776923,4.8,6.5
3,"Alameda County, CA",1996,63.190184,36.809816,0.0,0.0,84.04908,15.95092,0.0,0.0,...,659950.0,677688.461538,665654.0,689632.0,34591.769231,28610.0,39006.0,5.107692,4.2,5.7
4,"Alameda County, CA",1997,73.053892,26.946108,0.0,0.0,0.0,0.0,0.0,53.030303,...,678307.0,693262.461538,683088.0,703102.0,31132.692308,24795.0,34815.0,4.5,3.5,5.0


In [30]:
# Save merged dataset to CSV
merged.to_csv("california_health_bls_merged.csv", index=False)

print("✅ Exported merged dataset to california_health_bls_merged.csv")

✅ Exported merged dataset to california_health_bls_merged.csv


## Summary

The result of this pivot table process is a merged dataset that enables comparisons between LAUS and BRFSS. This dataset is output into the same directory that this notebook is running.