# Merging Census and Health Data
Health data is saved as "health.csv" so change that if you have it labeled as something else.

To change variables extracted in census data, change codes under the "variables of interest section" and update the change name section.

To change variables extracted in census data, change "columns to keep" section.

In [2]:
import pandas as pd
import censusdata

## Census Data

In [67]:
# --- Variables of interest ---
variables = [
    'B19013_001E',  # Median income
    'B01002_001E',  # Median age
    'B17001_002E',  # Poverty
    'B17001_001E',  # Poverty population
    'B15003_017E',  # HS grad
    'B15003_022E',  # Bachelor's degree
    'B15003_001E',  # Education population
    'B23025_005E',  # Unemployed
    'B23025_001E'   # Unemployed population
]

# --- Valid state FIPS codes (50 states + DC) ---
valid_states = [
    '01','02','04','05','06','08','09','10','11','12','13','15','16','17','18','19',
    '20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35',
    '36','37','38','39','40','41','42','44','45','46','47','48','49','50','51','53',
    '54','55','56'
]

# --- Function to parse censusgeo objects ---
def split_geo(cgeo):
    codes = dict(cgeo.geo)
    name_parts = cgeo.name.split(", ")
    tract_name = name_parts[0]
    county_name = name_parts[1]
    state_name = name_parts[2]
    return pd.Series([
        codes.get('state'), codes.get('county'), codes.get('tract'),
        state_name, county_name, tract_name
    ])

# --- Download and process data for all states ---
all_data = []

for state in valid_states:
    print(f"Downloading data for state FIPS: {state}")
    data = censusdata.download(
        'acs5', 2015,
        censusdata.censusgeo([('state', state), ('county', '*'), ('tract', '*')]),
        variables
    )
    
    # Extract geo info
    geo_cols = data.index.to_series().apply(split_geo)
    geo_cols.columns = [
        'State_FIPS', 'County_FIPS', 'Tract_FIPS',
        'State_Name', 'County_Name', 'Tract_Name'
    ]
    
    # Combine geo info with data
    data_clean = pd.concat([geo_cols, data], axis=1).reset_index(drop=True)
    all_data.append(data_clean)

# --- Combine all states ---
census_df = pd.concat(all_data, ignore_index=True)

# --- Create 11-digit tract FIPS ---
census_df["Tract_FIPS_full"] = (
    census_df["State_FIPS"].astype(str).str.zfill(2) +
    census_df["County_FIPS"].astype(str).str.zfill(3) +
    census_df["Tract_FIPS"].astype(str).str.zfill(6)
)

#rename variables
census_df.rename(columns={
    'B19013_001E': 'Median_Income',
    'B01002_001E': 'Median_Age',
    'B17001_002E': 'Poverty_Count',
    'B17001_001E': 'Poverty_Pop',
    'B15003_017E': 'HS_Grad_Count',
    'B15003_022E': 'Bachelors_Count',
    'B15003_001E': 'Education_Pop',
    'B23025_005E': 'Unemployed_Count',
    'B23025_001E': 'Unemployed_Pop'
}, inplace=True)


print("ACS tract-level data ready with full 11-digit FIPS!")


Downloading data for state FIPS: 01
Downloading data for state FIPS: 02
Downloading data for state FIPS: 04
Downloading data for state FIPS: 05
Downloading data for state FIPS: 06
Downloading data for state FIPS: 08
Downloading data for state FIPS: 09
Downloading data for state FIPS: 10
Downloading data for state FIPS: 11
Downloading data for state FIPS: 12
Downloading data for state FIPS: 13
Downloading data for state FIPS: 15
Downloading data for state FIPS: 16
Downloading data for state FIPS: 17
Downloading data for state FIPS: 18
Downloading data for state FIPS: 19
Downloading data for state FIPS: 20
Downloading data for state FIPS: 21
Downloading data for state FIPS: 22
Downloading data for state FIPS: 23
Downloading data for state FIPS: 24
Downloading data for state FIPS: 25
Downloading data for state FIPS: 26
Downloading data for state FIPS: 27
Downloading data for state FIPS: 28
Downloading data for state FIPS: 29
Downloading data for state FIPS: 30
Downloading data for state F

In [69]:
census_df.head()

Unnamed: 0,State_FIPS,County_FIPS,Tract_FIPS,State_Name,County_Name,Tract_Name,Median_Income,Median_Age,Poverty_Count,Poverty_Pop,HS_Grad_Count,Bachelors_Count,Education_Pop,Unemployed_Count,Unemployed_Pop,Tract_FIPS_full
0,1,103,5109,Alabama,Morgan County,Census Tract 51.09,29644.0,29.6,1476,4792,695,290,2710,303,3424,1103005109
1,1,103,5106,Alabama,Morgan County,Census Tract 51.06,35864.0,43.3,1186,5723,899,924,4064,293,4796,1103005106
2,1,103,5107,Alabama,Morgan County,Census Tract 51.07,66739.0,43.5,137,4853,632,805,3382,126,4032,1103005107
3,1,103,5108,Alabama,Morgan County,Census Tract 51.08,64632.0,45.8,566,3787,589,535,2769,166,3110,1103005108
4,1,103,5701,Alabama,Morgan County,Census Tract 57.01,46306.0,38.4,571,2784,295,205,1783,52,2022,1103005701


In [71]:
census_df.columns

Index(['State_FIPS', 'County_FIPS', 'Tract_FIPS', 'State_Name', 'County_Name',
       'Tract_Name', 'Median_Income', 'Median_Age', 'Poverty_Count',
       'Poverty_Pop', 'HS_Grad_Count', 'Bachelors_Count', 'Education_Pop',
       'Unemployed_Count', 'Unemployed_Pop', 'Tract_FIPS_full'],
      dtype='object')

## Health Data

In [74]:
health_df = pd.read_csv("health.csv")

In [76]:
health_df.head()

Unnamed: 0,StateAbbr,PlaceName,PlaceFIPS,TractFIPS,Place_TractID,Population2010,ACCESS2_CrudePrev,ACCESS2_Crude95CI,ARTHRITIS_CrudePrev,ARTHRITIS_Crude95CI,...,PAPTEST_Crude95CI,PHLTH_CrudePrev,PHLTH_Crude95CI,SLEEP_CrudePrev,SLEEP_Crude95CI,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Geolocation
0,CA,Los Angeles,644000,6037206032,0644000-06037206032,5275,28.0,"(22.7, 33.2)",19.9,"(18.8, 20.9)",...,"(81.5, 86.0)",17.3,"(15.3, 19.2)",38.9,"(37.2, 40.4)",4.0,"( 3.5, 4.5)",18.8,"(12.6, 25.6)",POINT (-118.224698433 34.0470512474)
1,CA,Pasadena,656000,6037462001,0656000-06037462001,3974,23.3,"(19.2, 27.8)",16.7,"(16.0, 17.5)",...,"(84.1, 87.5)",15.3,"(13.9, 16.8)",40.2,"(38.8, 41.5)",3.2,"( 2.9, 3.5)",18.1,"(13.7, 22.8)",POINT (-118.143832177 34.1633689905)
2,CA,Fullerton,628000,6059011504,0628000-06059011504,5473,11.7,"( 9.6, 14.5)",8.3,"( 7.9, 8.7)",...,"(75.4, 80.9)",8.5,"( 7.7, 9.5)",33.4,"(31.6, 35.1)",1.2,"( 1.1, 1.3)",11.9,"( 8.8, 16.0)",POINT (-117.883112998 33.8820402343)
3,TX,Longview,4843888,48183000401,4843888-48183000401,3371,19.3,"(16.8, 21.9)",21.1,"(20.2, 21.9)",...,"(76.5, 81.9)",11.8,"(10.8, 12.8)",34.9,"(33.4, 36.3)",2.7,"( 2.5, 3.0)",15.1,"(10.8, 20.4)",POINT (-94.7523262965 32.5544549842)
4,WI,Madison,5548000,55025002200,5548000-55025002200,4254,10.5,"( 8.7, 12.6)",23.3,"(21.9, 24.6)",...,"(83.4, 88.0)",11.3,"( 9.8, 12.7)",30.8,"(29.3, 32.2)",2.9,"( 2.5, 3.3)",12.9,"( 7.6, 19.8)",POINT (-89.3623704161 43.1211036947)


In [78]:
#extract the 11-digit tract FIPS after the dash
health_df["Tract_FIPS_full"] = health_df["Place_TractID"].str.split("-").str[-1]

In [80]:
health_df.head()

Unnamed: 0,StateAbbr,PlaceName,PlaceFIPS,TractFIPS,Place_TractID,Population2010,ACCESS2_CrudePrev,ACCESS2_Crude95CI,ARTHRITIS_CrudePrev,ARTHRITIS_Crude95CI,...,PHLTH_CrudePrev,PHLTH_Crude95CI,SLEEP_CrudePrev,SLEEP_Crude95CI,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Geolocation,Tract_FIPS_full
0,CA,Los Angeles,644000,6037206032,0644000-06037206032,5275,28.0,"(22.7, 33.2)",19.9,"(18.8, 20.9)",...,17.3,"(15.3, 19.2)",38.9,"(37.2, 40.4)",4.0,"( 3.5, 4.5)",18.8,"(12.6, 25.6)",POINT (-118.224698433 34.0470512474),6037206032
1,CA,Pasadena,656000,6037462001,0656000-06037462001,3974,23.3,"(19.2, 27.8)",16.7,"(16.0, 17.5)",...,15.3,"(13.9, 16.8)",40.2,"(38.8, 41.5)",3.2,"( 2.9, 3.5)",18.1,"(13.7, 22.8)",POINT (-118.143832177 34.1633689905),6037462001
2,CA,Fullerton,628000,6059011504,0628000-06059011504,5473,11.7,"( 9.6, 14.5)",8.3,"( 7.9, 8.7)",...,8.5,"( 7.7, 9.5)",33.4,"(31.6, 35.1)",1.2,"( 1.1, 1.3)",11.9,"( 8.8, 16.0)",POINT (-117.883112998 33.8820402343),6059011504
3,TX,Longview,4843888,48183000401,4843888-48183000401,3371,19.3,"(16.8, 21.9)",21.1,"(20.2, 21.9)",...,11.8,"(10.8, 12.8)",34.9,"(33.4, 36.3)",2.7,"( 2.5, 3.0)",15.1,"(10.8, 20.4)",POINT (-94.7523262965 32.5544549842),48183000401
4,WI,Madison,5548000,55025002200,5548000-55025002200,4254,10.5,"( 8.7, 12.6)",23.3,"(21.9, 24.6)",...,11.3,"( 9.8, 12.7)",30.8,"(29.3, 32.2)",2.9,"( 2.5, 3.3)",12.9,"( 7.6, 19.8)",POINT (-89.3623704161 43.1211036947),55025002200


In [82]:
health_df.columns

Index(['StateAbbr', 'PlaceName', 'PlaceFIPS', 'TractFIPS', 'Place_TractID',
       'Population2010', 'ACCESS2_CrudePrev', 'ACCESS2_Crude95CI',
       'ARTHRITIS_CrudePrev', 'ARTHRITIS_Crude95CI', 'BINGE_CrudePrev',
       'BINGE_Crude95CI', 'BPHIGH_CrudePrev', 'BPHIGH_Crude95CI',
       'BPMED_CrudePrev', 'BPMED_Crude95CI', 'CANCER_CrudePrev',
       'CANCER_Crude95CI', 'CASTHMA_CrudePrev', 'CASTHMA_Crude95CI',
       'CHD_CrudePrev', 'CHD_Crude95CI', 'CHECKUP_CrudePrev',
       'CHECKUP_Crude95CI', 'CHOLSCREEN_CrudePrev', 'CHOLSCREEN_Crude95CI',
       'COLON_SCREEN_CrudePrev', 'COLON_SCREEN_Crude95CI', 'COPD_CrudePrev',
       'COPD_Crude95CI', 'COREM_CrudePrev', 'COREM_Crude95CI',
       'COREW_CrudePrev', 'COREW_Crude95CI', 'CSMOKING_CrudePrev',
       'CSMOKING_Crude95CI', 'DENTAL_CrudePrev', 'DENTAL_Crude95CI',
       'DIABETES_CrudePrev', 'DIABETES_Crude95CI', 'HIGHCHOL_CrudePrev',
       'HIGHCHOL_Crude95CI', 'KIDNEY_CrudePrev', 'KIDNEY_Crude95CI',
       'LPA_CrudePrev', 'LPA_

In [84]:
#filter for columns we want
columns_to_keep = [
    "StateAbbr",
    "PlaceName",
    "PlaceFIPS",
    "TractFIPS",
    "Place_TractID",
    "Tract_FIPS_full",
    "Population2010",
    "OBESITY_CrudePrev",
    "DIABETES_CrudePrev",
    "HIGHCHOL_CrudePrev",
    "BPHIGH_CrudePrev",
    "STROKE_CrudePrev",
    "SLEEP_CrudePrev",
    "MHLTH_CrudePrev",
    "CASTHMA_CrudePrev",
    "CHD_CrudePrev",
    "CSMOKING_CrudePrev"
]

# Filter merged dataframe
health_filtered = health_df[columns_to_keep]

## Merge

In [87]:
merged_df = health_filtered.merge(census_df, on="Tract_FIPS_full", how="inner")

In [89]:
merged_df.columns

Index(['StateAbbr', 'PlaceName', 'PlaceFIPS', 'TractFIPS', 'Place_TractID',
       'Tract_FIPS_full', 'Population2010', 'OBESITY_CrudePrev',
       'DIABETES_CrudePrev', 'HIGHCHOL_CrudePrev', 'BPHIGH_CrudePrev',
       'STROKE_CrudePrev', 'SLEEP_CrudePrev', 'MHLTH_CrudePrev',
       'CASTHMA_CrudePrev', 'CHD_CrudePrev', 'CSMOKING_CrudePrev',
       'State_FIPS', 'County_FIPS', 'Tract_FIPS', 'State_Name', 'County_Name',
       'Tract_Name', 'Median_Income', 'Median_Age', 'Poverty_Count',
       'Poverty_Pop', 'HS_Grad_Count', 'Bachelors_Count', 'Education_Pop',
       'Unemployed_Count', 'Unemployed_Pop'],
      dtype='object')

In [91]:
merged_df.head()

Unnamed: 0,StateAbbr,PlaceName,PlaceFIPS,TractFIPS,Place_TractID,Tract_FIPS_full,Population2010,OBESITY_CrudePrev,DIABETES_CrudePrev,HIGHCHOL_CrudePrev,...,Tract_Name,Median_Income,Median_Age,Poverty_Count,Poverty_Pop,HS_Grad_Count,Bachelors_Count,Education_Pop,Unemployed_Count,Unemployed_Pop
0,CA,Los Angeles,644000,6037206032,0644000-06037206032,6037206032,5275,29.5,15.5,38.3,...,Census Tract 2060.32,26094.0,29.5,1829,5104,675,309,2920,185,3835
1,CA,Pasadena,656000,6037462001,0656000-06037462001,6037462001,3974,32.0,11.6,31.8,...,Census Tract 4620.01,40192.0,29.4,947,3893,475,367,2419,167,2903
2,CA,Fullerton,628000,6059011504,0628000-06059011504,6059011504,5473,18.7,4.6,21.1,...,Census Tract 115.04,31793.0,22.8,1740,4768,225,873,2481,379,5547
3,TX,Longview,4843888,48183000401,4843888-48183000401,48183000401,3371,33.9,9.2,33.9,...,Census Tract 4.01,52846.0,34.0,454,3170,637,201,2261,123,2678
4,WI,Madison,5548000,55025002200,5548000-55025002200,55025002200,4254,29.5,8.7,34.6,...,Census Tract 22,42621.0,42.1,491,4026,523,594,2902,339,3429


In [93]:
merged_df.shape

(27209, 32)

## Cleaning

In [98]:
#rename Crude Columns
merged_df = merged_df.rename(columns={
    "Population2010" : "Health_Pop",
    "OBESITY_CrudePrev": "Obesity_Pct",
    "DIABETES_CrudePrev": "Diabetes_Pct",
    "HIGHCHOL_CrudePrev": "HighChol_Pct",
    "BPHIGH_CrudePrev": "HighBP_Pct",
    "STROKE_CrudePrev": "Stroke_Pct",
    "SLEEP_CrudePrev": "Sleep_Pct",
    "MHLTH_CrudePrev": "MentalHealth_Pct",
    "CASTHMA_CrudePrev": "Asthma_Pct",
    "CHD_CrudePrev": "HeartDisease_Pct",
    "CSMOKING_CrudePrev": "Smoking_Pct"
})

In [100]:
merged_df.head()

Unnamed: 0,StateAbbr,PlaceName,PlaceFIPS,TractFIPS,Place_TractID,Tract_FIPS_full,Health_Pop,Obesity_Pct,Diabetes_Pct,HighChol_Pct,...,Tract_Name,Median_Income,Median_Age,Poverty_Count,Poverty_Pop,HS_Grad_Count,Bachelors_Count,Education_Pop,Unemployed_Count,Unemployed_Pop
0,CA,Los Angeles,644000,6037206032,0644000-06037206032,6037206032,5275,29.5,15.5,38.3,...,Census Tract 2060.32,26094.0,29.5,1829,5104,675,309,2920,185,3835
1,CA,Pasadena,656000,6037462001,0656000-06037462001,6037462001,3974,32.0,11.6,31.8,...,Census Tract 4620.01,40192.0,29.4,947,3893,475,367,2419,167,2903
2,CA,Fullerton,628000,6059011504,0628000-06059011504,6059011504,5473,18.7,4.6,21.1,...,Census Tract 115.04,31793.0,22.8,1740,4768,225,873,2481,379,5547
3,TX,Longview,4843888,48183000401,4843888-48183000401,48183000401,3371,33.9,9.2,33.9,...,Census Tract 4.01,52846.0,34.0,454,3170,637,201,2261,123,2678
4,WI,Madison,5548000,55025002200,5548000-55025002200,55025002200,4254,29.5,8.7,34.6,...,Census Tract 22,42621.0,42.1,491,4026,523,594,2902,339,3429


In [102]:
merged_df.columns

Index(['StateAbbr', 'PlaceName', 'PlaceFIPS', 'TractFIPS', 'Place_TractID',
       'Tract_FIPS_full', 'Health_Pop', 'Obesity_Pct', 'Diabetes_Pct',
       'HighChol_Pct', 'HighBP_Pct', 'Stroke_Pct', 'Sleep_Pct',
       'MentalHealth_Pct', 'Asthma_Pct', 'HeartDisease_Pct', 'Smoking_Pct',
       'State_FIPS', 'County_FIPS', 'Tract_FIPS', 'State_Name', 'County_Name',
       'Tract_Name', 'Median_Income', 'Median_Age', 'Poverty_Count',
       'Poverty_Pop', 'HS_Grad_Count', 'Bachelors_Count', 'Education_Pop',
       'Unemployed_Count', 'Unemployed_Pop'],
      dtype='object')

In [104]:
merged_df.shape

(27209, 32)

In [106]:
#make new variables for raw count (health data)
#using the Health_Pop (from the health data)
for col in ["Obesity_Pct", "Diabetes_Pct", "HighChol_Pct", "HighBP_Pct",
            "Stroke_Pct", "Sleep_Pct", "MentalHealth_Pct",
            "Asthma_Pct", "HeartDisease_Pct", "Smoking_Pct"]:
    new_col = col.replace("_Pct", "_Count")
    merged_df[new_col] = round((merged_df[col] / 100) * merged_df["Health_Pop"])

In [108]:
#check the raw numbers
merged_df[["Health_Pop", "Obesity_Pct", "Obesity_Count"]].head()

Unnamed: 0,Health_Pop,Obesity_Pct,Obesity_Count
0,5275,29.5,1556.0
1,3974,32.0,1272.0
2,5473,18.7,1023.0
3,3371,33.9,1143.0
4,4254,29.5,1255.0


In [110]:
#make new variables for percentages (census data)
#using the population from each universe
merged_df["Poverty_Pct"] = (merged_df["Poverty_Count"] / merged_df["Poverty_Pop"]) * 100
merged_df["HS_Grad_Pct"] = (merged_df["HS_Grad_Count"] / merged_df["Education_Pop"]) * 100
merged_df["Bachelors_Pct"] = (merged_df["Bachelors_Count"] / merged_df["Education_Pop"]) * 100
merged_df["Unemployed_Pct"] = (merged_df["Unemployed_Count"] / merged_df["Unemployed_Pop"]) * 100

In [114]:
#check the raw numbers
merged_df[["Education_Pop", "HS_Grad_Pct", "HS_Grad_Count"]].head()

Unnamed: 0,Education_Pop,HS_Grad_Pct,HS_Grad_Count
0,2920,23.116438,675
1,2419,19.636213,475
2,2481,9.068924,225
3,2261,28.173375,637
4,2902,18.022054,523


In [116]:
merged_df.head()

Unnamed: 0,StateAbbr,PlaceName,PlaceFIPS,TractFIPS,Place_TractID,Tract_FIPS_full,Health_Pop,Obesity_Pct,Diabetes_Pct,HighChol_Pct,...,Stroke_Count,Sleep_Count,MentalHealth_Count,Asthma_Count,HeartDisease_Count,Smoking_Count,Poverty_Pct,HS_Grad_Pct,Bachelors_Pct,Unemployed_Pct
0,CA,Los Angeles,644000,6037206032,0644000-06037206032,6037206032,5275,29.5,15.5,38.3,...,211.0,2052.0,791.0,448.0,364.0,823.0,35.834639,23.116438,10.582192,4.82399
1,CA,Pasadena,656000,6037462001,0656000-06037462001,6037462001,3974,32.0,11.6,31.8,...,127.0,1598.0,624.0,374.0,191.0,707.0,24.325713,19.636213,15.171558,5.75267
2,CA,Fullerton,628000,6059011504,0628000-06059011504,6059011504,5473,18.7,4.6,21.1,...,66.0,1828.0,772.0,504.0,115.0,728.0,36.493289,9.068924,35.187424,6.832522
3,TX,Longview,4843888,48183000401,4843888-48183000401,48183000401,3371,33.9,9.2,33.9,...,91.0,1176.0,455.0,314.0,182.0,691.0,14.321767,28.173375,8.889872,4.59298
4,WI,Madison,5548000,55025002200,5548000-55025002200,55025002200,4254,29.5,8.7,34.6,...,123.0,1310.0,502.0,391.0,255.0,706.0,12.195728,18.022054,20.468642,9.886264


In [118]:
#check % of missing data
merged_df.isna().mean().sort_values(ascending=False).head(10)

Poverty_Pct         0.002132
Median_Income       0.001323
Bachelors_Pct       0.000294
HS_Grad_Pct         0.000294
Unemployed_Pct      0.000147
Tract_FIPS_full     0.000000
Poverty_Pop         0.000000
Bachelors_Count     0.000000
Education_Pop       0.000000
Unemployed_Count    0.000000
dtype: float64

In [120]:
#shape without NAs (less than 100 rows lost)
merged_df = merged_df.dropna()
merged_df.shape

(27113, 46)

In [122]:
#check % of missing data (should be 0s)
merged_df.isna().mean().sort_values(ascending=False).head(10)

StateAbbr           0.0
HighChol_Count      0.0
Poverty_Count       0.0
Poverty_Pop         0.0
HS_Grad_Count       0.0
Bachelors_Count     0.0
Education_Pop       0.0
Unemployed_Count    0.0
Unemployed_Pop      0.0
Obesity_Count       0.0
dtype: float64

In [124]:
#reorder columns for better readability
# List the columns in the order you want
cols = [
    'StateAbbr', 'PlaceName', 'PlaceFIPS', 'TractFIPS', 'Place_TractID',
    'Tract_FIPS_full', 'Health_Pop',
    'Obesity_Pct', 'Obesity_Count',
    'Diabetes_Pct', 'Diabetes_Count',
    'HighChol_Pct', 'HighChol_Count',
    'HighBP_Pct', 'HighBP_Count',
    'Stroke_Pct', 'Stroke_Count',
    'Sleep_Pct', 'Sleep_Count',
    'MentalHealth_Pct', 'MentalHealth_Count',
    'Asthma_Pct', 'Asthma_Count',
    'HeartDisease_Pct', 'HeartDisease_Count',
    'Smoking_Pct', 'Smoking_Count',
    'State_FIPS', 'County_FIPS', 'Tract_FIPS', 'State_Name', 'County_Name',
    'Tract_Name', 'Median_Income', 'Median_Age',
    'Poverty_Pct', 'Poverty_Count',
    'HS_Grad_Pct', 'HS_Grad_Count',
    'Bachelors_Pct', 'Bachelors_Count',
    'Unemployed_Pct', 'Unemployed_Count'
]

# Reorder the DataFrame
merged_df = merged_df[cols]

In [126]:
merged_df.columns

Index(['StateAbbr', 'PlaceName', 'PlaceFIPS', 'TractFIPS', 'Place_TractID',
       'Tract_FIPS_full', 'Health_Pop', 'Obesity_Pct', 'Obesity_Count',
       'Diabetes_Pct', 'Diabetes_Count', 'HighChol_Pct', 'HighChol_Count',
       'HighBP_Pct', 'HighBP_Count', 'Stroke_Pct', 'Stroke_Count', 'Sleep_Pct',
       'Sleep_Count', 'MentalHealth_Pct', 'MentalHealth_Count', 'Asthma_Pct',
       'Asthma_Count', 'HeartDisease_Pct', 'HeartDisease_Count', 'Smoking_Pct',
       'Smoking_Count', 'State_FIPS', 'County_FIPS', 'Tract_FIPS',
       'State_Name', 'County_Name', 'Tract_Name', 'Median_Income',
       'Median_Age', 'Poverty_Pct', 'Poverty_Count', 'HS_Grad_Pct',
       'HS_Grad_Count', 'Bachelors_Pct', 'Bachelors_Count', 'Unemployed_Pct',
       'Unemployed_Count'],
      dtype='object')

## Save as CSV

In [129]:
#save as csv (won't have to run merging and cleaning again)
merged_df.to_csv("merged_health_census_clean.csv", index=False)