- CDC PLACES: Local Data for Better Health County Data  2023, 2024 release
  - Source: https://data.cdc.gov/500-Cities-Places/PLACES-Local-Data-for-Better-Health-County-Data-20/swc5-untb/
  - Includes county-level health measures including diabetes prevalence from BRFSS for 2021 and 2022
  - Data includes confidence intervals and population denominators
  - Contains multiple health indicators and measures across different categories

In [2]:
import os
from pathlib import Path
import pandas as pd

#pandas settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# Get the absolute path to the project root
project_root = Path.cwd().parent.parent

# Construct the path to the PLACES data files
places_path = project_root / '02_data' / '02.01_raw'

# Read the PLACES data files with correct filenames

places_2023 = pd.read_csv(places_path / 'PLACES__Local_Data_for_Better_Health__County_Data_2023.csv')
places_2024 = pd.read_csv(places_path / 'PLACES__Local_Data_for_Better_Health__County_Data_2024.csv')

#undercase all column names
places_2023.columns = places_2023.columns.str.lower()
places_2024.columns = places_2024.columns.str.lower()

# Display the first few rows where measure includes diabetes of the 2023 PLACES data files
print("\n2023 Data:")
display(places_2023[places_2023['measure'].str.contains('diabetes')].head())
print("\n2024 Data:")
display(places_2024[places_2024['measure'].str.contains('diabetes')].head())


2023 Data:


  places_2024 = pd.read_csv(places_path / 'PLACES__Local_Data_for_Better_Health__County_Data_2024.csv')


Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,data_value_footnote_symbol,data_value_footnote,low_confidence_limit,high_confidence_limit,totalpopulation,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
38,2021,WA,Washington,King,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Crude prevalence,7.3,,,6.4,8.2,2252305,53033,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-121.8339765 47.4905518)
63,2021,AK,Alaska,Skagway,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Crude prevalence,8.3,,,6.9,9.8,1132,2230,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-135.3382787 59.5603794)
86,2021,GA,Georgia,Cobb,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Crude prevalence,10.0,,,8.7,11.6,766802,13067,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-84.5741213 33.9399286)
92,2021,GA,Georgia,Richmond,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Crude prevalence,15.3,,,13.3,17.4,205673,13245,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-82.0749982 33.3614863)
110,2021,ID,Idaho,Power,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Crude prevalence,12.6,,,10.6,14.7,7950,16077,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-112.844407 42.694126)



2024 Data:


Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,data_value_footnote_symbol,data_value_footnote,low_confidence_limit,high_confidence_limit,totalpopulation,totalpop18plus,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
0,2022,US,United States,,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,12.0,,,11.8,12.2,333287557,260836730,59,HLTHOUT,DIABETES,CrdPrv,Diabetes,
17,2022,GA,Georgia,Bryan,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,9.8,,,8.5,11.2,48225,34280,13029,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-81.4437379812069 32.0137075119901)
19,2022,GA,Georgia,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,19.4,,,17.0,21.9,2845,2272,13061,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-84.9792463725169 31.6261945164247)
35,2022,ID,Idaho,Benewah,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,13.0,,,11.2,14.9,10370,8035,16009,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-116.658809779797 47.216881555053)
55,2022,IA,Iowa,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Age-adjusted prevalence,9.2,,,8.0,10.6,16475,12760,19041,HLTHOUT,DIABETES,AgeAdjPrv,Diabetes,POINT (-95.1511161069588 43.0825667543454)


In [4]:
locationid_19041_2023_diabetes = places_2023[(places_2023['locationid'] == 19041) & (places_2023['measure'].str.contains('diabetes', case=False))]
locationid_19041_2024_diabetes = places_2024[(places_2024['locationid'] == 19041) & (places_2024['measure'].str.contains('diabetes', case=False))]

print("\n2023 Data for locationid 19041 with 'diabetes' in measure:")
display(locationid_19041_2023_diabetes)
print("\n2024 Data for locationid 19041 with 'diabetes' in measure:")
display(locationid_19041_2024_diabetes)




2023 Data for locationid 19041 with 'diabetes' in measure:


Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,data_value_footnote_symbol,data_value_footnote,low_confidence_limit,high_confidence_limit,totalpopulation,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
4596,2021,IA,Iowa,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Age-adjusted prevalence,8.4,,,7.0,9.8,16440,19041,HLTHOUT,DIABETES,AgeAdjPrv,Diabetes,POINT (-95.1498753 43.0812244)
58073,2021,IA,Iowa,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults aged >=18 years,%,Crude prevalence,10.7,,,9.0,12.5,16440,19041,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-95.1498753 43.0812244)



2024 Data for locationid 19041 with 'diabetes' in measure:


Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,data_value_footnote_symbol,data_value_footnote,low_confidence_limit,high_confidence_limit,totalpopulation,totalpop18plus,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
55,2022,IA,Iowa,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Age-adjusted prevalence,9.2,,,8.0,10.6,16475,12760,19041,HLTHOUT,DIABETES,AgeAdjPrv,Diabetes,POINT (-95.1511161069588 43.0825667543454)
68794,2022,IA,Iowa,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,11.7,,,10.2,13.4,16475,12760,19041,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-95.1511161069588 43.0825667543454)


In [5]:
#filter each dataset to only include the year that matches with ACS 2021 and 2022
places_2023 = places_2023[places_2023['year'] == 2021]
places_2024 = places_2024[places_2024['year'] == 2022]

In [10]:
# Extract the relevant columns
places_2023 = places_2023[places_2023['measureid'].isin(['DIABETES', 'GHLTH','CSMOKING', 'OBESITY', 'CHECKUP', 'DEPRESSION', 'ACCESS2', 'MHLTH'])] 
places_2024 = places_2024[places_2024['measureid'].isin(['DIABETES', 'GHLTH','CSMOKING', 'OBESITY', 'CHECKUP', 'DEPRESSION', 'ACCESS2', 'MHLTH'])] 

display(places_2023.head(3))
display(places_2024.head(3))
print(places_2023['measure'].value_counts())
print(places_2024['measure'].value_counts())

Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,data_value_footnote_symbol,data_value_footnote,low_confidence_limit,high_confidence_limit,totalpopulation,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
9,2021,VA,Virginia,Montgomery,BRFSS,Health Outcomes,Obesity among adults aged >=18 years,%,Crude prevalence,28.6,,,23.5,34.4,98473,51121,HLTHOUT,OBESITY,CrdPrv,Obesity,POINT (-80.3877942 37.1755376)
10,2021,VA,Virginia,Sussex,BRFSS,Health Outcomes,Obesity among adults aged >=18 years,%,Crude prevalence,42.2,,,34.3,50.6,10763,51183,HLTHOUT,OBESITY,CrdPrv,Obesity,POINT (-77.2597319 36.9266454)
13,2021,UT,Utah,Piute,BRFSS,Health Outcomes,Depression among adults aged >=18 years,%,Crude prevalence,23.1,,,19.0,27.5,1487,49031,HLTHOUT,DEPRESSION,CrdPrv,Depression,POINT (-112.1293755 38.3358805)


Unnamed: 0,year,stateabbr,statedesc,locationname,datasource,category,measure,data_value_unit,data_value_type,data_value,data_value_footnote_symbol,data_value_footnote,low_confidence_limit,high_confidence_limit,totalpopulation,totalpop18plus,locationid,categoryid,measureid,datavaluetypeid,short_question_text,geolocation
0,2022,US,United States,,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,12.0,,,11.8,12.2,333287557,260836730,59,HLTHOUT,DIABETES,CrdPrv,Diabetes,
17,2022,GA,Georgia,Bryan,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,9.8,,,8.5,11.2,48225,34280,13029,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-81.4437379812069 32.0137075119901)
19,2022,GA,Georgia,Clay,BRFSS,Health Outcomes,Diagnosed diabetes among adults,%,Crude prevalence,19.4,,,17.0,21.9,2845,2272,13061,HLTHOUT,DIABETES,CrdPrv,Diabetes,POINT (-84.9792463725169 31.6261945164247)


measure
Obesity among adults aged >=18 years                                                      6154
Depression among adults aged >=18 years                                                   6154
Diagnosed diabetes among adults aged >=18 years                                           6154
Current lack of health insurance among adults aged 18-64 years                            6154
Fair or poor self-rated health status among adults aged >=18 years                        6154
Mental health not good for >=14 days among adults aged >=18 years                         6154
Visits to doctor for routine checkup within the past year among adults aged >=18 years    6154
Current smoking among adults aged >=18 years                                              6154
Name: count, dtype: int64
measure
Diagnosed diabetes among adults                                           6290
Obesity among adults                                                      6290
Depression among adults                 

In [16]:
# Filter for age-adjusted prevalence 
places_2023_crude = places_2023[places_2023['data_value_type'] == 'Age-adjusted prevalence'].copy()
places_2024_crude = places_2024[places_2024['data_value_type'] == 'Age-adjusted prevalence'].copy()


places_2023_wide = places_2023_crude.pivot_table(
    index=['locationid', 'year', 'stateabbr', 'statedesc', 'locationname', 'totalpopulation'], 
    columns='measureid', 
    values='data_value'
)

places_2024_wide = places_2024_crude.pivot_table(
    index=['locationid', 'year', 'stateabbr', 'statedesc', 'locationname', 'totalpopulation', 'totalpop18plus'], 
    columns='measureid', 
    values='data_value'
)

# Reset index to make the index columns regular columns
places_2023_wide = places_2023_wide.reset_index()
places_2024_wide = places_2024_wide.reset_index()

places_2023_wide.columns.name = None 
places_2024_wide.columns.name = None

places_2023_wide.columns = places_2023_wide.columns.str.lower()
places_2024_wide.columns = places_2024_wide.columns.str.lower()

places_2024_wide = places_2024_wide.drop(columns=['totalpop18plus'])

places_2023_wide = places_2023_wide.rename(columns={'locationid': 'fips'})
places_2024_wide = places_2024_wide.rename(columns={'locationid': 'fips'})
places_2023_wide['fips'] = places_2023_wide['fips'].apply(lambda x: str(x).zfill(5))
places_2024_wide['fips'] = places_2024_wide['fips'].apply(lambda x: str(x).zfill(5))


print("Wide Format 2023 Data (Age-adjusted prevalence):")
display(places_2023_wide.head())

print("\nWide Format 2024 Data (Age-adjusted prevalence):")
display(places_2024_wide.head())

Wide Format 2023 Data (Age-adjusted prevalence):


Unnamed: 0,fips,year,stateabbr,statedesc,locationname,totalpopulation,access2,checkup,csmoking,depression,diabetes,ghlth,mhlth,obesity
0,1001,2021,AL,Alabama,Autauga,59095,10.4,76.0,16.9,22.7,10.7,17.3,18.0,38.9
1,1003,2021,AL,Alabama,Baldwin,239294,9.5,72.6,15.0,23.2,9.8,15.2,17.3,37.2
2,1005,2021,AL,Alabama,Barbour,24964,17.2,76.6,25.0,22.6,15.6,27.3,20.5,43.4
3,1007,2021,AL,Alabama,Bibb,22477,14.3,72.7,22.0,23.3,12.4,22.0,19.4,39.6
4,1009,2021,AL,Alabama,Blount,59041,13.1,72.7,19.6,24.2,10.5,18.6,18.8,37.7



Wide Format 2024 Data (Age-adjusted prevalence):


Unnamed: 0,fips,year,stateabbr,statedesc,locationname,totalpopulation,access2,checkup,csmoking,depression,diabetes,ghlth,mhlth,obesity
0,1001,2022,AL,Alabama,Autauga,59759,9.0,77.9,15.4,24.3,11.3,17.7,18.0,38.4
1,1003,2022,AL,Alabama,Baldwin,246435,9.6,76.8,14.7,25.1,10.2,17.0,18.2,36.8
2,1005,2022,AL,Alabama,Barbour,24706,16.6,79.1,21.9,23.5,16.0,28.4,21.1,43.8
3,1007,2022,AL,Alabama,Bibb,22005,13.1,76.3,21.8,26.6,13.3,25.1,21.3,41.4
4,1009,2022,AL,Alabama,Blount,59512,16.1,75.2,19.5,28.0,11.6,22.6,21.1,37.3


In [17]:
# Get the current working directory
current_dir = os.getcwd()

# Save the files 
output_dir = os.path.join(current_dir, '..', '..', '02_data', '02.01_raw')
os.makedirs(output_dir, exist_ok=True)
places_2023_wide.to_csv(os.path.join(output_dir, 'brfss_data_2021.csv'), index=False)
places_2024_wide.to_csv(os.path.join(output_dir, 'brfss_data_2022.csv'), index=False)