# Population by Race

The purpose of this notebook is to use the 2023 5-Year American Community Survey's data files on populations by race categories.

<b> NOTE: YOU NEED TO FIX THE 'OTHER RACE' COUNTS </b>


In [3]:
## import libraries
import pandas as pd
import geopandas as gpd

In [4]:
## set viz options
pd.set_option('display.max_rows', 500)

In [12]:
## import population and broad race and population categories
broad_categories = pd.read_excel('../input/demographics/ethnicity/demographics.xlsx',
                                 sheet_name = 'cleaned_filtered',
                                 dtype = {'ct':'str'})

In [24]:
broad_categories.head()

Unnamed: 0,ct,pop,pop_u18,hisp,white,black,asian,other_race,multiracial
0,36005000100,3538,0,866,958,1545,79,27,63
1,36005000200,5177,882,3198,77,1517,311,33,41
2,36005000400,6481,1446,3483,333,2481,19,80,85
3,36005001600,6011,1266,3299,104,2087,282,157,82
4,36005001901,2401,556,1085,355,875,12,36,38


In [13]:
## import crosswalk, which matches 2020 census tracts with 2023 council districts
crosswalk = pd.read_csv('../input/crosswalks/ct20-to-cd23-crosswalk.csv', dtype = {'ct':'str', 'cd':'str'})

In [14]:
## import the old demographics file, which includes the geometry for each census tract
old_demographics_file = gpd.read_file('../input/GIS/old_gis/demographics.json')

In [17]:
## read in the old centroid file, which includes geometry for points within each census tract
old_centroid_file = gpd.read_file('../input/GIS/old_gis/demographics-centroids.json')

In [20]:
## merge the crosswalk with the race categories
tracts_w_districts = broad_categories.merge(crosswalk[['ct','cd']],
                                            on = 'ct',
                                            how = 'left')

In [22]:
## take a peak
tracts_w_districts.head()

Unnamed: 0,ct,pop,pop_u18,hisp,white,black,asian,other_race,multiracial,cd
0,36005000100,3538,0,866,958,1545,79,27,63,22
1,36005000200,5177,882,3198,77,1517,311,33,41,18
2,36005000400,6481,1446,3483,333,2481,19,80,85,18
3,36005001600,6011,1266,3299,104,2087,282,157,82,18
4,36005001901,2401,556,1085,355,875,12,36,38,8


In [23]:
## write to a csv
tracts_w_districts.to_csv('../output/demographics/race_pop_23.csv')