# Clean the PLACES Data

In [1]:
import pandas as pd
import geopandas as gpd


### Read the Data

In [31]:
# Define the file path templates
file_path_template_2017_2019 = r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\PLACES data\500_Cities__Census_Tract-level_Data__GIS_Friendly_Format___{}_release_20250209.csv"
file_path_template_2020_2024 = r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\PLACES data\PLACES__Census_Tract_Data__GIS_Friendly_Format___{}_release_20250209.csv"

# Define individual dataframes for each year
places_data_2017 = pd.read_csv(file_path_template_2017_2019.format(2017))
places_data_2018 = pd.read_csv(file_path_template_2017_2019.format(2018))
places_data_2019 = pd.read_csv(file_path_template_2017_2019.format(2019))
places_data_2020 = pd.read_csv(file_path_template_2020_2024.format(2020))
places_data_2021 = pd.read_csv(file_path_template_2020_2024.format(2021))
places_data_2022 = pd.read_csv(file_path_template_2020_2024.format(2022))
places_data_2023 = pd.read_csv(file_path_template_2020_2024.format(2023))
places_data_2024 = pd.read_csv(file_path_template_2020_2024.format(2024))


# Loop through the years 2017 to 2019 and read the corresponding files
for year in range(2017, 2020):
    file_path = file_path_template_2017_2019.format(year)
    places_data[year] = pd.read_csv(file_path)

# Loop through the years 2020 to 2024 and read the corresponding files
for year in range(2020, 2025):
    file_path = file_path_template_2020_2024.format(year)
    places_data[year] = pd.read_csv(file_path)

### Check the format of data

In [26]:
print(places_data_2017['Geolocation'].apply(type).value_counts())
print(places_data_2017['Geolocation'].apply(type).value_counts())
print(places_data_2018['Geolocation'].apply(type).value_counts())
print(places_data_2019['Geolocation'].apply(type).value_counts())
print(places_data_2020['Geolocation'].apply(type).value_counts())
print(places_data_2021['Geolocation'].apply(type).value_counts())
print(places_data_2022['Geolocation'].apply(type).value_counts())
print(places_data_2023['Geolocation'].apply(type).value_counts())
print(places_data_2024['Geolocation'].apply(type).value_counts())


<class 'str'>    444
Name: Geolocation, dtype: int64
<class 'str'>    444
Name: Geolocation, dtype: int64
<class 'str'>    444
Name: Geolocation, dtype: int64
<class 'str'>    444
Name: Geolocation, dtype: int64
<class 'str'>    1955
Name: Geolocation, dtype: int64
<class 'str'>    1955
Name: Geolocation, dtype: int64
<class 'str'>    1955
Name: Geolocation, dtype: int64
<class 'str'>    1955
Name: Geolocation, dtype: int64
<class 'str'>    2784
Name: Geolocation, dtype: int64


In [27]:
print(places_data_2017['Geolocation'].head())
print(places_data_2018['Geolocation'].head())
print(places_data_2019['Geolocation'].head())
print(places_data_2020['Geolocation'].head())
print(places_data_2021['Geolocation'].head())
print(places_data_2022['Geolocation'].head())
print(places_data_2023['Geolocation'].head())
print(places_data_2024['Geolocation'].head())


0    (31.57179200820, -84.0916661878)
1    (31.57904097040, -84.1296291540)
2    (31.61484136300, -84.1973866421)
3    (31.59865853510, -84.2282377893)
4    (31.59499555440, -84.2063318482)
Name: Geolocation, dtype: object
0     POINT (-84.302143031 33.7468473577)
1    POINT (-82.0487386714 33.3827681525)
2    POINT (-83.7042441207 32.8084122826)
3    POINT (-84.3731348097 33.8902362582)
4    POINT (-84.3708874971 33.7321403119)
Name: Geolocation, dtype: object
0    (31.57179200820, -84.0916661878)
1    (31.57904097040, -84.1296291540)
2    (31.61484136300, -84.1973866421)
3    (31.59865853510, -84.2282377893)
4    (31.59499555440, -84.2063318482)
Name: Geolocation, dtype: object
0    POINT (-82.27028582 31.87801856)
1    POINT (-82.31341795 31.81026967)
2    POINT (-82.37645917 31.76132967)
3    POINT (-82.19073533 31.66518743)
4    POINT (-82.38065559 31.69426079)
Name: Geolocation, dtype: object
0    POINT (-84.78243186 33.98144876)
1    POINT (-83.72419409 32.92759581)
2    POINT (

In [32]:

import geopandas as gpd
from shapely.wkt import loads

# Function to handle tuple-like "(lat, lon)" format
def split_tuple_geolocation(df):
    df[['Latitude', 'Longitude']] = df['Geolocation'].str.extract(r'\(([-.\d]+),\s*([-.\d]+)\)').astype(float)
    return df

# Function to handle WKT "POINT (lon lat)" format
def split_wkt_geolocation(df):
    df[['Longitude', 'Latitude']] = df['Geolocation'].apply(lambda x: loads(x) if isinstance(x, str) and x.startswith("POINT") else None)\
                                                   .apply(lambda p: (p.x, p.y) if p else (None, None))\
                                                   .apply(pd.Series)
    return df

# Process each year's DataFrame
year_dfs = {year: globals()[f'places_data_{year}'] for year in range(2017, 2025)}

for year, df in year_dfs.items():
    print(f"Processing {year} - Sample Data:")
    print(df['Geolocation'].head())  # Debugging step

    if df['Geolocation'].str.startswith("POINT").any():
        modified_df = split_wkt_geolocation(df)
    else:
        modified_df = split_tuple_geolocation(df)
    
    modified_df.drop(columns=['Geolocation'], inplace=True)  # Drop original column
    globals()[f'modified_places_data_{year}'] = modified_df  # Save to new DataFrame
    # Save to CSV
    modified_df.to_csv(f'modified_places_data_{year}.csv', index=False)



Processing 2017 - Sample Data:
0    (31.57179200820, -84.0916661878)
1    (31.57904097040, -84.1296291540)
2    (31.61484136300, -84.1973866421)
3    (31.59865853510, -84.2282377893)
4    (31.59499555440, -84.2063318482)
Name: Geolocation, dtype: object
Processing 2018 - Sample Data:
0     POINT (-84.302143031 33.7468473577)
1    POINT (-82.0487386714 33.3827681525)
2    POINT (-83.7042441207 32.8084122826)
3    POINT (-84.3731348097 33.8902362582)
4    POINT (-84.3708874971 33.7321403119)
Name: Geolocation, dtype: object
Processing 2019 - Sample Data:
0    (31.57179200820, -84.0916661878)
1    (31.57904097040, -84.1296291540)
2    (31.61484136300, -84.1973866421)
3    (31.59865853510, -84.2282377893)
4    (31.59499555440, -84.2063318482)
Name: Geolocation, dtype: object
Processing 2020 - Sample Data:
0    POINT (-82.27028582 31.87801856)
1    POINT (-82.31341795 31.81026967)
2    POINT (-82.37645917 31.76132967)
3    POINT (-82.19073533 31.66518743)
4    POINT (-82.38065559 31.6942607

### Merge data with census tract data

In [2]:
# read the modified dataframes
modified_places_data_2017 = pd.read_csv('modified_places_data_2017.csv')
modified_places_data_2018 = pd.read_csv('modified_places_data_2018.csv')
modified_places_data_2019 = pd.read_csv('modified_places_data_2019.csv')
modified_places_data_2020 = pd.read_csv('modified_places_data_2020.csv')
modified_places_data_2021 = pd.read_csv('modified_places_data_2021.csv')
modified_places_data_2022 = pd.read_csv('modified_places_data_2022.csv')
modified_places_data_2023 = pd.read_csv('modified_places_data_2023.csv')
modified_places_data_2024 = pd.read_csv('modified_places_data_2024.csv')


In [4]:
# read the shp file for 500 cities
tracts_500c_gdf = gpd.read_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\500Cities_Tracts_11082016\500Cities_Tracts_Clip.shp")

tracts_500c_gdf.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,place2010,tract2010,ST,PlaceName,plctract10,PlcTrPop10,geometry
0,107000,1073000100,1,Birmingham,0107000-01073000100,3042,"POLYGON ((-9653001.056 3974630.899, -9652982.0..."
1,107000,1073000300,1,Birmingham,0107000-01073000300,2735,"POLYGON ((-9657462.853 3968794.996, -9657470.2..."
2,107000,1073000400,1,Birmingham,0107000-01073000400,3338,"POLYGON ((-9657275.613 3972217.16, -9657275.39..."
3,107000,1073000500,1,Birmingham,0107000-01073000500,2864,"POLYGON ((-9660666.406 3968887.964, -9660631.7..."
4,107000,1073000700,1,Birmingham,0107000-01073000700,2577,"POLYGON ((-9661391.097 3968591.429, -9661398.2..."


In [None]:

# Function to merge data with geometries
def merge_with_geometries_500c(df, tracts_gdf):
    # the common identifier is plctract10 for the shp file and Place_TractID for the csv file
    merged_gdf = tracts_500c_gdf.merge(df, left_on='plctract10', right_on='Place_TractID')
    return merged_gdf

# Merge each year's DataFrame with the geometries for 2017, 2018, and 2019
for year in range(2017, 2020):
    modified_df = globals()[f'modified_places_data_{year}']
    merged_gdf = merge_with_geometries_500c(modified_df, tracts_500c_gdf)
    globals()[f'merged_places_data_{year}'] = merged_gdf  # Save to new GeoDataFrame

   

merged_places_data_2017.head()




AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,place2010,tract2010,ST,PlaceName_x,plctract10,PlcTrPop10,geometry,StateAbbr,PlaceName_y,PlaceFIPS,...,PHLTH_CrudePrev,PHLTH_Crude95CI,SLEEP_CrudePrev,SLEEP_Crude95CI,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Latitude,Longitude
0,1301052,13095000100,13,Albany,1301052-13095000100,5751,"POLYGON ((-9359015.091 3707504.811, -9359015.6...",GA,Albany,1301052,...,16.9,"(15.4, 18.5)",50.4,"(48.3, 52.0)",4.7,"( 4.2, 5.1)",31.9,"(24.9, 39.1)",31.571792,-84.091666
1,1301052,13095000200,13,Albany,1301052-13095000200,3123,"POLYGON ((-9363269.834 3708279.79, -9363265.04...",GA,Albany,1301052,...,20.4,"(18.3, 22.7)",52.5,"(50.6, 54.1)",6.7,"( 5.9, 7.6)",36.8,"(26.7, 46.6)",31.579041,-84.129629
2,1301052,13095000400,13,Albany,1301052-13095000400,6276,"POLYGON ((-9370538.444 3713896.903, -9370538.5...",GA,Albany,1301052,...,13.7,"(12.4, 14.9)",42.1,"(40.4, 43.6)",3.6,"( 3.2, 4.0)",18.3,"(12.3, 25.7)",31.614841,-84.197387
3,1301052,13095000501,13,Albany,1301052-13095000501,4798,"POLYGON ((-9375654.911 3712685.613, -9375654.0...",GA,Albany,1301052,...,11.7,"(10.9, 12.5)",41.3,"(40.1, 42.5)",3.1,"( 2.9, 3.3)",15.3,"(11.5, 19.8)",31.598659,-84.228238
4,1301052,13095000502,13,Albany,1301052-13095000502,4151,"POLYGON ((-9374868.216 3712680.253, -9374841.9...",GA,Albany,1301052,...,12.1,"(10.5, 13.9)",34.8,"(33.2, 36.5)",3.7,"( 3.1, 4.3)",13.4,"( 8.2, 20.9)",31.594996,-84.206332


In [9]:
# save merged places data 2017 to desktop as a GeoJSON file
merged_places_data_2017.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2017.geojson", driver='GeoJSON')

# save the 2018 and 2019 data as GeoJSON files
merged_places_data_2018.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2018.geojson", driver='GeoJSON')
merged_places_data_2019.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2019.geojson", driver='GeoJSON')


In [None]:
# read the shp file for 2015 census tracts
ga_tracts_2015_gdf = gpd.read_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\Georgia Census Tract 2015\cb_2015_13_tract_500k.shp")

In [11]:
ga_tracts_2015_gdf.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,13,13,180203,1400000US13013180203,13013180203,1802.03,CT,5653200,15023,"POLYGON ((-83.71626 33.98884, -83.70924 33.992..."
1,13,13,180205,1400000US13013180205,13013180205,1802.05,CT,10897091,129881,"POLYGON ((-83.77742 33.98744, -83.77467 33.991..."
2,13,21,10500,1400000US13021010500,13021010500,105.0,CT,1352913,0,"POLYGON ((-83.65698 32.82572, -83.65634 32.827..."
3,13,21,13407,1400000US13021013407,13021013407,134.07,CT,7870306,17650,"POLYGON ((-83.72017 32.91738, -83.71864 32.920..."
4,13,25,960300,1400000US13025960300,13025960300,9603.0,CT,353842838,3942388,"POLYGON ((-82.28456 31.22445, -82.28356 31.226..."


In [12]:
modified_places_data_2020.head()


Unnamed: 0,StateAbbr,StateDesc,CountyName,CountyFIPS,TractFIPS,TotalPopulation,ACCESS2_CrudePrev,ACCESS2_Crude95CI,ARTHRITIS_CrudePrev,ARTHRITIS_Crude95CI,...,PHLTH_CrudePrev,PHLTH_Crude95CI,SLEEP_CrudePrev,SLEEP_Crude95CI,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Longitude,Latitude
0,GA,Georgia,Appling,13001,13001950100,3190,22.6,"(19.6, 25.9)",29.6,"(28.4, 30.9)",...,16.0,"(14.8, 17.6)",38.2,"(37.0, 39.1)",4.0,"( 3.6, 4.4)",20.6,"(14.7, 26.9)",-82.270286,31.878019
1,GA,Georgia,Appling,13001,13001950200,4530,27.5,"(25.3, 29.8)",28.8,"(28.1, 29.6)",...,17.4,"(16.5, 18.6)",44.1,"(43.2, 45.2)",4.9,"( 4.6, 5.2)",25.5,"(21.0, 30.4)",-82.313418,31.81027
2,GA,Georgia,Appling,13001,13001950300,5176,30.8,"(28.4, 33.3)",32.8,"(31.8, 33.6)",...,20.4,"(19.3, 21.4)",41.3,"(40.3, 42.0)",5.4,"( 5.0, 5.8)",28.9,"(22.9, 34.6)",-82.376459,31.76133
3,GA,Georgia,Appling,13001,13001950400,1476,22.1,"(20.6, 24.0)",32.1,"(31.5, 32.8)",...,16.5,"(15.7, 17.2)",39.2,"(38.5, 40.0)",4.8,"( 4.6, 5.0)",20.5,"(17.6, 23.5)",-82.190735,31.665187
4,GA,Georgia,Appling,13001,13001950500,3864,24.4,"(21.5, 27.5)",30.0,"(28.8, 31.3)",...,17.3,"(15.7, 19.1)",39.6,"(38.2, 41.2)",4.2,"( 3.8, 4.7)",21.9,"(16.1, 27.9)",-82.380656,31.694261


In [16]:
print(ga_tracts_2015_gdf['GEOID'].dtype)
print(modified_places_data_2023['TractFIPS'].dtype)

object
int64


In [19]:
modified_places_data_2020['TractFIPS'] = modified_places_data_2020['TractFIPS'].astype(str)
modified_places_data_2021['TractFIPS'] = modified_places_data_2021['TractFIPS'].astype(str)
modified_places_data_2022['TractFIPS'] = modified_places_data_2022['TractFIPS'].astype(str)
modified_places_data_2023['TractFIPS'] = modified_places_data_2023['TractFIPS'].astype(str)
modified_places_data_2024['TractFIPS'] = modified_places_data_2024['TractFIPS'].astype(str)
print(modified_places_data_2020.dtypes)
print(modified_places_data_2021.dtypes)
print(modified_places_data_2022.dtypes)
print(modified_places_data_2023.dtypes)
print(modified_places_data_2024.dtypes)

StateAbbr               object
StateDesc               object
CountyName              object
CountyFIPS               int64
TractFIPS               object
                        ...   
STROKE_Crude95CI        object
TEETHLOST_CrudePrev    float64
TEETHLOST_Crude95CI     object
Longitude              float64
Latitude               float64
Length: 64, dtype: object
StateAbbr               object
StateDesc               object
CountyName              object
CountyFIPS               int64
TractFIPS               object
                        ...   
STROKE_Crude95CI        object
TEETHLOST_CrudePrev    float64
TEETHLOST_Crude95CI     object
Longitude              float64
Latitude               float64
Length: 68, dtype: object
StateAbbr               object
StateDesc               object
CountyName              object
CountyFIPS               int64
TractFIPS               object
                        ...   
STROKE_Crude95CI        object
TEETHLOST_CrudePrev    float64
TEETHLOST_Crude95C

In [None]:
# Function to merge data with geometries
def merge_with_geometries_2015(df, tracts_gdf):
    # the common identifier is GEOID for the shp file and TRACTFIPS for the csv file 
    merged_gdf = ga_tracts_2015_gdf.merge(df, left_on='GEOID', right_on='TractFIPS')
    return merged_gdf

# Merge each year's DataFrame with the geometries for 2020, 2021, 2022, 2023

for year in range(2020, 2024):
    modified_df = globals()[f'modified_places_data_{year}']
    merged_gdf = merge_with_geometries_2015(modified_df, ga_tracts_2015_gdf)
    globals()[f'merged_places_data_{year}'] = merged_gdf  # Save to new GeoDataFrame

   

merged_places_data_2020.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry,...,PHLTH_CrudePrev,PHLTH_Crude95CI,SLEEP_CrudePrev,SLEEP_Crude95CI,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Longitude,Latitude
0,13,13,180203,1400000US13013180203,13013180203,1802.03,CT,5653200,15023,"POLYGON ((-83.71626 33.98884, -83.70924 33.992...",...,17.7,"(16.9, 18.6)",44.3,"(43.2, 45.5)",4.5,"( 4.3, 4.8)",27.5,"(23.1, 32.4)",-83.697769,33.984533
1,13,13,180205,1400000US13013180205,13013180205,1802.05,CT,10897091,129881,"POLYGON ((-83.77742 33.98744, -83.77467 33.991...",...,15.1,"(14.3, 16.2)",40.1,"(38.8, 40.9)",3.9,"( 3.6, 4.2)",19.7,"(14.8, 25.3)",-83.743552,33.991497
2,13,21,10500,1400000US13021010500,13021010500,105.0,CT,1352913,0,"POLYGON ((-83.65698 32.82572, -83.65634 32.827...",...,14.9,"(14.0, 16.3)",46.1,"(44.8, 48.1)",2.9,"( 2.7, 3.1)",38.7,"(32.4, 46.1)",-83.650602,32.82597
3,13,21,13407,1400000US13021013407,13021013407,134.07,CT,7870306,17650,"POLYGON ((-83.72017 32.91738, -83.71864 32.920...",...,12.6,"(11.4, 14.4)",38.9,"(37.5, 40.4)",3.7,"( 3.2, 4.1)",14.4,"( 9.0, 21.1)",-83.70522,32.90222
4,13,25,960300,1400000US13025960300,13025960300,9603.0,CT,353842838,3942388,"POLYGON ((-82.28456 31.22445, -82.28356 31.226...",...,17.1,"(15.8, 18.3)",37.9,"(36.4, 39.4)",4.3,"( 4.0, 4.6)",22.2,"(18.0, 26.9)",-82.135647,31.150398


In [22]:
# save the 2020, 2021, 2022, 2023 data as GeoJSON files
merged_places_data_2020.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2020.geojson", driver='GeoJSON')
merged_places_data_2021.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2021.geojson", driver='GeoJSON')
merged_places_data_2022.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2022.geojson", driver='GeoJSON')
merged_places_data_2023.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2023.geojson", driver='GeoJSON')

In [24]:
ga_tracts_2022_gdf = gpd.read_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\Georgia Census Tract 2022\cb_2022_13_tract_500k.shp")
ga_tracts_2022_gdf.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,NAMELSAD,STUSPS,NAMELSADCO,STATE_NAME,LSAD,ALAND,AWATER,geometry
0,13,101,880100,1400000US13101880100,13101880100,8801.0,Census Tract 8801,GA,Echols County,Georgia,CT,873494768,809409,"POLYGON ((-83.05618 30.62117, -83.05466 30.621..."
1,13,185,11100,1400000US13185011100,13185011100,111.0,Census Tract 111,GA,Lowndes County,Georgia,CT,1957360,0,"POLYGON ((-83.30259 30.85971, -83.29251 30.859..."
2,13,277,960700,1400000US13277960700,13277960700,9607.0,Census Tract 9607,GA,Tift County,Georgia,CT,24158997,407744,"POLYGON ((-83.5864 31.47687, -83.5848 31.47964..."
3,13,95,1500,1400000US13095001500,13095001500,15.0,Census Tract 15,GA,Dougherty County,Georgia,CT,1944375,0,"POLYGON ((-84.17615 31.56811, -84.1677 31.5681..."
4,13,95,10402,1400000US13095010402,13095010402,104.02,Census Tract 104.02,GA,Dougherty County,Georgia,CT,24626133,214138,"POLYGON ((-84.3132 31.54351, -84.31072 31.5467..."


In [None]:
# Function to merge data with geometries
def merge_with_geometries_2022(df, tracts_gdf):
    # the common identifier is GEOID for the shp file and TRACTFIPS for the csv file 
    merged_gdf = ga_tracts_2022_gdf.merge(df, left_on='GEOID', right_on='TractFIPS')
    return merged_gdf



for year in range(2024,2025):
    modified_df = globals()[f'modified_places_data_{year}']
    merged_gdf = merge_with_geometries_2022(modified_df, ga_tracts_2022_gdf)
    globals()[f'merged_places_data_{year}'] = merged_gdf  # Save to new GeoDataFrame

   

merged_places_data_2024.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,NAMELSAD,STUSPS,NAMELSADCO,STATE_NAME,...,HOUSINSECU_CrudePrev,HOUSINSECU_Crude95CI,SHUTUTILITY_CrudePrev,SHUTUTILITY_Crude95CI,LACKTRPT_CrudePrev,LACKTRPT_Crude95CI,EMOTIONSPT_CrudePrev,EMOTIONSPT_Crude95CI,Longitude,Latitude
0,13,101,880100,1400000US13101880100,13101880100,8801.0,Census Tract 8801,GA,Echols County,Georgia,...,16.3,"(14.4, 18.3)",10.5,"( 9.2, 11.9)",11.9,"(10.8, 13.1)",30.3,"(26.5, 34.4)",-82.839194,30.7089
1,13,185,11100,1400000US13185011100,13185011100,111.0,Census Tract 111,GA,Lowndes County,Georgia,...,14.7,"(13.1, 16.4)",9.1,"( 8.0, 10.2)",13.6,"(12.4, 14.9)",30.3,"(26.9, 33.8)",-83.290168,30.852926
2,13,277,960700,1400000US13277960700,13277960700,9607.0,Census Tract 9607,GA,Tift County,Georgia,...,20.8,"(18.6, 23.1)",13.5,"(12.0, 15.1)",13.1,"(11.9, 14.3)",32.7,"(28.7, 36.8)",-83.550648,31.451474
3,13,95,1500,1400000US13095001500,13095001500,15.0,Census Tract 15,GA,Dougherty County,Georgia,...,33.9,"(31.0, 36.9)",24.6,"(22.2, 27.2)",23.7,"(21.9, 25.6)",41.5,"(37.1, 45.7)",-84.167095,31.566201
4,13,95,10402,1400000US13095010402,13095010402,104.02,Census Tract 104.02,GA,Dougherty County,Georgia,...,28.5,"(26.0, 31.1)",20.6,"(18.4, 22.7)",19.1,"(17.5, 20.7)",36.8,"(32.8, 40.9)",-84.263322,31.562636


In [26]:
merged_places_data_2024.to_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2024.geojson", driver='GeoJSON')

# Some minor analysis

In [1]:
import pandas as pd
import geopandas as gpd

gdf = gpd.read_file(r"C:\Users\kaavy\OneDrive\Documents\Sci4GA Internship\merged_places_data_2024.geojson")

In [None]:
# Ensure the columns are numeric
gdf['CASTHMA_CrudePrev'] = pd.to_numeric(gdf['CASTHMA_CrudePrev'], errors='coerce')
gdf['TotalPopulation'] = pd.to_numeric(gdf['TotalPopulation'], errors='coerce')

# Create the new column
gdf['AsthmaRate'] = gdf['CASTHMA_CrudePrev'] / gdf['TotalPopulation']

# Sort the dataframe by the new column in descending order and get the top 5 rows
top_5_asthma_rate = gdf.nlargest(6, 'AsthmaRate') # 6 because the top 1 Fulton tract is an outlier

# Print the COUNTYFP and GEOID for the top 5 largest values of the new column
top_5_asthma_rate[['NAMELSADCO', 'COUNTYFP', 'GEOID', 'AsthmaRate', 'TotalPopulation']]

Unnamed: 0,NAMELSADCO,COUNTYFP,GEOID,AsthmaRate,TotalPopulation
214,Fulton County,121,13121003700,0.062827,191
751,Chattahoochee County,53,13053020206,0.033617,235
545,Bibb County,21,13021011500,0.015149,1010
1423,Greene County,133,13133950400,0.013444,900
743,Fulton County,121,13121006602,0.013429,1050
39,Fulton County,121,13121006400,0.01286,902


In [8]:
top_5_asthma_rate[['NAMELSADCO', 'COUNTYFP', 'GEOID', 'AsthmaRate', 'TotalPopulation', 'CASTHMA_CrudePrev']]

Unnamed: 0,NAMELSADCO,COUNTYFP,GEOID,AsthmaRate,TotalPopulation,CASTHMA_CrudePrev
214,Fulton County,121,13121003700,0.062827,191,12.0
751,Chattahoochee County,53,13053020206,0.033617,235,7.9
545,Bibb County,21,13021011500,0.015149,1010,15.3
1423,Greene County,133,13133950400,0.013444,900,12.1
743,Fulton County,121,13121006602,0.013429,1050,14.1
39,Fulton County,121,13121006400,0.01286,902,11.6


In [10]:
# Ensure the columns are numeric
gdf['CHD_CrudePrev'] = pd.to_numeric(gdf['CHD_CrudePrev'], errors='coerce')
gdf['TotalPopulation'] = pd.to_numeric(gdf['TotalPopulation'], errors='coerce')

# Create the new column
gdf['CHDRate'] = gdf['CHD_CrudePrev'] / gdf['TotalPopulation']

# Sort the dataframe by the new column in descending order and get the top 5 rows
top_5_chd_rate = gdf.nlargest(6, 'CHDRate')

In [11]:
top_5_chd_rate[['NAMELSADCO', 'COUNTYFP', 'GEOID', 'CHDRate', 'TotalPopulation', 'CHD_CrudePrev']]

Unnamed: 0,NAMELSADCO,COUNTYFP,GEOID,CHDRate,TotalPopulation,CHD_CrudePrev
214,Fulton County,121,13121003700,0.08534,191,16.3
545,Bibb County,21,13021011500,0.013168,1010,13.3
1423,Greene County,133,13133950400,0.012111,900,10.9
1065,Baker County,7,13007960200,0.012021,940,11.3
2692,Hart County,147,13147960102,0.011888,715,8.5
1683,McIntosh County,191,13191110102,0.011185,903,10.1


In [12]:
# Ensure the columns are numeric
gdf['COPD_CrudePrev'] = pd.to_numeric(gdf['COPD_CrudePrev'], errors='coerce')
gdf['TotalPopulation'] = pd.to_numeric(gdf['TotalPopulation'], errors='coerce')

# Create the new column
gdf['COPDRate'] = gdf['COPD_CrudePrev'] / gdf['TotalPopulation']

# Sort the dataframe by the new column in descending order and get the top 5 rows
top_5_copd_rate = gdf.nlargest(6, 'COPDRate')

In [13]:
top_5_copd_rate[['NAMELSADCO', 'COUNTYFP', 'GEOID', 'CHDRate', 'TotalPopulation', 'CHD_CrudePrev']]

Unnamed: 0,NAMELSADCO,COUNTYFP,GEOID,CHDRate,TotalPopulation,CHD_CrudePrev
214,Fulton County,121,13121003700,0.08534,191,16.3
545,Bibb County,21,13021011500,0.013168,1010,13.3
1423,Greene County,133,13133950400,0.012111,900,10.9
1065,Baker County,7,13007960200,0.012021,940,11.3
692,Richmond County,245,13245000300,0.00905,1105,10.0
743,Fulton County,121,13121006602,0.008,1050,8.4
