In [1]:
import pandas as pd

In [2]:
# https://chronicdata.cdc.gov/500-Cities-Places/PLACES-Local-Data-for-Better-Health-County-Data-20/swc5-untb/about_data
# https://www.cdc.gov/places/measure-definitions/index.html
df = pd.read_csv('../00_data/01_raw/PLACES__Local_Data_for_Better_Health,_County_Data_2024_release_20250828.csv',low_memory=False)

In [4]:
df['Short_Question_Text'].unique()

array(['Current Asthma', 'Arthritis', 'Stroke', 'Obesity',
       'Any Disability', 'Binge Drinking', 'High Blood Pressure',
       'Diabetes', 'Depression', 'Cognitive Disability',
       'Frequent Mental Distress', 'All Teeth Lost', 'Hearing Disability',
       'COPD', 'Vision Disability', 'Mobility Disability', 'Mammography',
       'Self-care Disability', 'Cholesterol Screening',
       'Coronary Heart Disease', 'Food Insecurity',
       'Independent Living Disability', 'Physical Inactivity',
       'Utility Services Threat', 'High Cholesterol',
       'Current Cigarette Smoking', 'Short Sleep Duration',
       'Health Insurance', 'Frequent Physical Distress', 'General Health',
       'Annual Checkup', 'Colorectal Cancer Screening', 'Dental Visit',
       'Social Isolation', 'Food Stamps',
       'High Blood Pressure Medication',
       'Lack of Social/Emotional Support', 'Housing Insecurity',
       'Cancer (non-skin) or Melanoma', 'Transportation Barriers'],
      dtype=object)

In [5]:
df.loc[df['Short_Question_Text']=='Any Disability']

Unnamed: 0,Year,StateAbbr,StateDesc,LocationName,DataSource,Category,Measure,Data_Value_Unit,Data_Value_Type,Data_Value,...,Low_Confidence_Limit,High_Confidence_Limit,TotalPopulation,TotalPop18plus,LocationID,CategoryID,MeasureId,DataValueTypeID,Short_Question_Text,Geolocation
4,2022,AL,Alabama,Lawrence,BRFSS,Disability,Any disability among adults,%,Crude prevalence,40.1,...,35.5,44.7,33214,26022,1079,DISABLT,DISABILITY,CrdPrv,Any Disability,POINT (-87.3108851040374 34.5216735395968)
20,2022,AR,Arkansas,Cross,BRFSS,Disability,Any disability among adults,%,Crude prevalence,41.9,...,37.3,46.4,16601,12703,5037,DISABLT,DISABILITY,CrdPrv,Any Disability,POINT (-90.7714945147511 35.2958747021642)
54,2022,AL,Alabama,Covington,BRFSS,Disability,Any disability among adults,%,Crude prevalence,36.3,...,32.0,40.7,37602,29340,1039,DISABLT,DISABILITY,CrdPrv,Any Disability,POINT (-86.451444907521 31.2486200676573)
77,2022,AR,Arkansas,Perry,BRFSS,Disability,Any disability among adults,%,Crude prevalence,40.5,...,35.8,45.4,10063,7925,5105,DISABLT,DISABILITY,CrdPrv,Any Disability,POINT (-92.9308792596259 34.9473919842415)
80,2022,CA,California,Inyo,BRFSS,Disability,Any disability among adults,%,Age-adjusted prevalence,28.3,...,24.4,32.4,18718,14942,6027,DISABLT,DISABILITY,AgeAdjPrv,Any Disability,POINT (-117.411112581295 36.5114226868129)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240731,2022,WY,Wyoming,Albany,BRFSS,Disability,Any disability among adults,%,Age-adjusted prevalence,28.9,...,25.2,32.7,38031,32250,56001,DISABLT,DISABILITY,AgeAdjPrv,Any Disability,POINT (-105.723439465307 41.6544509650776)
240754,2022,WI,Wisconsin,Jefferson,BRFSS,Disability,Any disability among adults,%,Age-adjusted prevalence,28.0,...,24.1,32.2,85784,69136,55055,DISABLT,DISABILITY,AgeAdjPrv,Any Disability,POINT (-88.7759315522696 43.0208620651204)
240797,2022,WI,Wisconsin,Walworth,BRFSS,Disability,Any disability among adults,%,Crude prevalence,27.0,...,23.6,30.8,105380,85160,55127,DISABLT,DISABILITY,CrdPrv,Any Disability,POINT (-88.5418387513161 42.6685116518019)
240838,2022,WY,Wyoming,Sweetwater,BRFSS,Disability,Any disability among adults,%,Crude prevalence,31.1,...,27.2,35.0,41345,31117,56037,DISABLT,DISABILITY,CrdPrv,Any Disability,POINT (-108.879886957441 41.6596981373813)


In [6]:
factor_list = [
       'DEPRESSION', #
       'DIABETES', #
       'OBESITY', #
       # 23% Missing 'ISOLATION', # adults who report always/usually/sometimes feeling socially isolated.
       # 23% Missing 'EMOTIONSPT', # adults who report sometimes, rarely, or never getting the social and emotional support needed.
       'BINGE', # adults who report having ≥5 drinks (men) or ≥4 drinks (women) on ≥1 occasion during the previous 30 days.
       'CSMOKING', # adults who report having smoked ≥ 100 cigarettes in their lifetime and currently smoke every day or some days.
       'LPA', # having no leisure-time physical activity 
       'CANCER',
       'VISION', #Vision Disability
       'MOBILITY',
       'SELFCARE',
       'DISABILITY'
]

In [7]:
df_select = df.loc[(df['MeasureId'].isin(factor_list)) & (df['DataValueTypeID'] == 'CrdPrv')].copy()
df_select['TotalPopulation'] = df['TotalPopulation'].str.replace(',', '').astype('int')
df_select['Value_N'] = df_select['TotalPopulation'] * df_select['Data_Value'] * 0.01

In [8]:
df_clean = df_select.pivot(index=['StateAbbr','LocationName','LocationID','TotalPopulation'],
                columns='MeasureId',
                values='Value_N').reset_index().dropna()

In [9]:
df_clean.to_csv('../00_data/02_intermediate/place.csv',index=False)