## Race / ethnicity for California's 50th Congressional District. 

##### This notebook does the following:
-Joins the race file with census tract labels

-Creates Predominate Race Variable

-Creates an index score based on race

-Merges the race dataset with geo info

-Creates files:

--race_ethn_w_predom_export.geojson

--race_ethn_w_predom_export.csv

Source: Table: B03002: U.S. Census Bureau, 2013-2017 American Community Survey 5-Year Estimates)

##### Source: Table: B03002: U.S. Census Bureau, 2013-2017 American Community Survey 5-Year Estimates)

#### Configuration And Needed Libraries

In [79]:
import pandas as pd
import numpy as np
import urllib.request, json 
import requests
from ast import literal_eval
import json # for working with JSON data
from shapely.geometry import Point

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
%matplotlib notebook
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import geojson # ditto for GeoJSON data - an extension of JSON with support for geographic data
import geopandas as gpd


In [80]:
# read in csv to dataframe
data2 = pd.read_csv('G:\My Drive\Data\Race Eth ACS 2017\Data_Exports\Race_Est_pct_poc.csv', delimiter = ',')
data2.head(2)

Unnamed: 0.1,Unnamed: 0,GEOID,Total,White,Latino,POC,Black,Asian,Other_agg,White_e_pct,Latino_e_pct,Black_e_pct,Asian_e_pct,Other_agg_e_pct,POC_e_pct
0,0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406
1,1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227


In [81]:
# rename columns
data2.rename(columns={'White':'Estimate White',
                          'Black': 'Estimate Black', 
                          'Asian':'Estimate Asian', 
                          'Latino':'Estimate Latino',
                          'Other_agg':'Estimate Other Race', 
                          'POC':'Estimate POC'}, inplace=True)
data2.head(2)

Unnamed: 0.1,Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White_e_pct,Latino_e_pct,Black_e_pct,Asian_e_pct,Other_agg_e_pct,POC_e_pct
0,0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406
1,1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227


In [82]:
# rename columns
data2.rename(columns={ 'Not_Latino_e_pct':'Not Latino', 
                        'White_e_pct':'White', 
                        'Black_e_pct':'Black',
                        'Asian_e_pct':'Asian', 
                        'Latino_e_pct':'Latino',
                        'Other_agg_e_pct':'Other_Race',
                        'POC_e_pct':'POC'}, inplace=True)
data2.head(2)

Unnamed: 0.1,Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC
0,0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406
1,1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227


In [83]:
# drop columns
data2 = data2.drop(['Unnamed: 0'], axis = 1) #
data2.head(2)

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC
0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406
1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227


#### Connecting Census Tract Labels and County Info with Race data

In [84]:
# read in lookup table
lu = pd.read_csv('G:\My Drive\Data\Race Eth ACS 2017\Data_Exports\lookup_ct_county_name.csv' , delimiter = ',')

# Joining Census Tract label and County label
data = pd.merge(data2, lu, on='GEOID', how='inner')
data.head(2)

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,Census Tract,County
0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406,CT 432.16,Riverside
1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227,CT 432.18,Riverside


#### Creadting Predominance Variables

In [85]:
# creating a list that represents column names
race =['Latino','White', 'Black', 'Asian','Other_Race']

# returns the largest value over a given dictionary of columns.
data['predominant_race'] = data[race].idxmax(axis=1)    # returning column label of the max value within the list
data['predominant_race_pct'] = data[race].max(axis=1)  # does this return a percentage. 
data.head(3)

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,Census Tract,County,predominant_race,predominant_race_pct
0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406,CT 432.16,Riverside,White,52.775594
1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227,CT 432.18,Riverside,White,65.761773
2,6065043220,4480,2144,1741,2336,164,312,119,47.857143,38.861607,3.660714,6.964286,2.65625,52.142857,CT 432.20,Riverside,White,47.857143


#### Creating one row that represents the racial demographics of the district

In [86]:
# Creating one row that represents the racial demographics of the district
data['District_50'] = "Demographics"
cols=[{'Total':"sum", 'Estimate Latino':"sum", 'Estimate White':"sum", 'Estimate POC':"sum"}]
agg = data.groupby('District_50', as_index=False).agg({'Total':"sum", 'Estimate Latino':"sum", 'Estimate Asian':'sum',
                                                       'Estimate Black':'sum','Estimate Other Race':'sum', 
                                                       'Estimate White':"sum", 'Estimate POC':"sum"})
agg

Unnamed: 0,District_50,Total,Estimate Latino,Estimate Asian,Estimate Black,Estimate Other Race,Estimate White,Estimate POC
0,Demographics,942523,289087,56273,30270,42581,524312,418211


In [87]:
# looking for the means of white and POC variables
data.describe()

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,predominant_race_pct
count,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0,159.0
mean,6072116000.0,5927.81761,3297.559748,1818.157233,2630.257862,190.377358,353.918239,267.805031,56.755931,30.452098,2.765333,5.482783,4.543854,43.244069,63.238001
std,2534646.0,3590.341156,2334.19276,1374.374525,1846.945029,357.773224,398.69053,224.670292,18.250575,18.086098,2.566517,4.552507,3.13287,18.250575,11.948275
min,6065043000.0,1600.0,178.0,144.0,293.0,0.0,0.0,0.0,3.94766,4.698206,0.0,0.0,0.0,12.312428,40.028694
25%,6073016000.0,4008.0,2005.5,881.0,1449.0,40.0,118.5,117.0,48.17806,18.049056,0.948893,2.136139,2.490268,29.198376,52.536053
50%,6073019000.0,5599.0,2751.0,1510.0,2253.0,106.0,211.0,217.0,59.885649,26.331092,1.925457,4.00319,3.82606,40.114351,63.083512
75%,6073020000.0,7143.5,4344.5,2246.0,3580.5,236.5,389.0,356.0,70.801624,37.785553,3.73484,7.778319,5.644824,51.82194,71.680293
max,6073021000.0,40402.0,24195.0,9747.0,16207.0,4106.0,1813.0,1400.0,87.687572,93.501885,12.225381,21.472849,19.297583,96.05234,93.501885


#### Creating Scoring for the index

In [88]:
data['whites_below_average']=0
data['poc_abv_average']=0

In [89]:
data['whites_below_average'] = np.where((data.White < 57), 1, 0)
data['poc_abv_average'] = np.where((data.POC > 43), 1, 0)
data.tail(10)

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,Census Tract,County,predominant_race,predominant_race_pct,District_50,whites_below_average,poc_abv_average
149,6073020904,1909,1508,281,401,3,19,98,78.994238,14.719749,0.15715,0.995285,5.133578,21.005762,CT 209.04,San Diego,White,78.994238,Demographics,0,0
150,6073021000,2693,1987,420,706,169,0,117,73.783884,15.59599,6.275529,0.0,4.344597,26.216116,CT 210,San Diego,White,73.783884,Demographics,0,0
151,6073021100,7434,3791,2878,3643,167,100,498,50.995426,38.714017,2.246435,1.345171,6.698951,49.004574,CT 211,San Diego,White,50.995426,Demographics,1,1
152,6073021202,3570,2235,964,1335,72,9,290,62.605042,27.002801,2.016807,0.252101,8.123249,37.394958,CT 212.02,San Diego,White,62.605042,Demographics,0,0
153,6073021204,5660,4903,457,757,47,182,71,86.625442,8.074205,0.830389,3.215548,1.254417,13.374558,CT 212.04,San Diego,White,86.625442,Demographics,0,0
154,6073021205,7073,5484,1169,1589,125,98,197,77.534285,16.52764,1.767284,1.385551,2.78524,22.465715,CT 212.05,San Diego,White,77.534285,Demographics,0,0
155,6073021206,3065,2536,144,529,0,28,357,82.74062,4.698206,0.0,0.91354,11.647635,17.25938,CT 212.06,San Diego,White,82.74062,Demographics,0,0
156,6073021302,7738,3359,3089,4379,946,137,207,43.40915,39.919876,12.225381,1.770483,2.67511,56.59085,CT 213.02,San Diego,White,43.40915,Demographics,1,1
157,6073021303,9497,4360,2893,5137,215,1567,462,45.909234,30.462251,2.263873,16.499947,4.864694,54.090766,CT 213.03,San Diego,White,45.909234,Demographics,1,1
158,6073021304,2508,1781,470,727,42,86,129,71.012759,18.740032,1.674641,3.429027,5.143541,28.987241,CT 213.04,San Diego,White,71.012759,Demographics,0,0


In [90]:
data['white_poc'] = data['whites_below_average'] + data['poc_abv_average'] 
data

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,Census Tract,County,predominant_race,predominant_race_pct,District_50,whites_below_average,poc_abv_average,white_poc
0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406,CT 432.16,Riverside,White,52.775594,Demographics,1,1,2
1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.329640,34.238227,CT 432.18,Riverside,White,65.761773,Demographics,0,0,0
2,6065043220,4480,2144,1741,2336,164,312,119,47.857143,38.861607,3.660714,6.964286,2.656250,52.142857,CT 432.20,Riverside,White,47.857143,Demographics,1,1,2
3,6065043222,4570,2963,882,1607,124,377,224,64.835886,19.299781,2.713348,8.249453,4.901532,35.164114,CT 432.22,Riverside,White,64.835886,Demographics,0,0,0
4,6065043246,5706,3592,1197,2114,172,432,313,62.951279,20.977918,3.014371,7.570978,5.485454,37.048721,CT 432.46,Riverside,White,62.951279,Demographics,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,6073021205,7073,5484,1169,1589,125,98,197,77.534285,16.527640,1.767284,1.385551,2.785240,22.465715,CT 212.05,San Diego,White,77.534285,Demographics,0,0,0
155,6073021206,3065,2536,144,529,0,28,357,82.740620,4.698206,0.000000,0.913540,11.647635,17.259380,CT 212.06,San Diego,White,82.740620,Demographics,0,0,0
156,6073021302,7738,3359,3089,4379,946,137,207,43.409150,39.919876,12.225381,1.770483,2.675110,56.590850,CT 213.02,San Diego,White,43.409150,Demographics,1,1,2
157,6073021303,9497,4360,2893,5137,215,1567,462,45.909234,30.462251,2.263873,16.499947,4.864694,54.090766,CT 213.03,San Diego,White,45.909234,Demographics,1,1,2


In [91]:
# export dataframe
data.to_csv(r'G:\My Drive\Data\Race Eth ACS 2017\Geo_Files\race_ethn_w_predom_export.csv', index=False)

#### Reading in geometry and merging Race data with geo data

In [92]:
# read in data with geometry
gmt = gpd.read_file(r'G:\My Drive\Data\Race Eth ACS 2017\Geo_Files\CA_congress.geojson')
gmt.head(2)


Unnamed: 0,GEOID,geometry
0,6065051200,"MULTIPOLYGON (((-117.14876 33.47290, -117.1493..."
1,6065049600,"POLYGON ((-117.16311 33.52238, -117.16311 33.5..."


### Step 2.
a.) Joining tabular data to geospatial data = geodataframe

b.) Joining tabular data to geospatial data = pandas dataframe

In [93]:
# joining tabular data to geodata and a geodataframe
gdf = gmt.merge(data, on='GEOID')
gdf.head(2)

Unnamed: 0,GEOID,geometry,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,Census Tract,County,predominant_race,predominant_race_pct,District_50,whites_below_average,poc_abv_average,white_poc
0,6065051200,"MULTIPOLYGON (((-117.14876 33.47290, -117.1493...",4002,1749,1752,2253,49,332,120,43.703148,43.778111,1.224388,8.295852,2.998501,56.296852,CT 512,Riverside,Latino,43.778111,Demographics,1,1,2
1,6065049600,"POLYGON ((-117.16311 33.52238, -117.16311 33.5...",8407,4511,2260,3896,885,388,363,53.657666,26.88236,10.526942,4.615202,4.31783,46.342334,CT 496,Riverside,White,53.657666,Demographics,1,1,2


In [94]:
# look at the datatype of the geodataframe
type(gdf)

geopandas.geodataframe.GeoDataFrame

In [95]:
# creating GEOJSON File

gdf.to_file(r'G:\My Drive\Data\Race Eth ACS 2017\Geo_Files\race_ethn_w_predom_export.geojson', driver='GeoJSON')

In [96]:
# joining tabular data to geodata and a geodataframe
gdf = data.merge(gmt, on='GEOID')
gdf.head(2)

Unnamed: 0,GEOID,Total,Estimate White,Estimate Latino,Estimate POC,Estimate Black,Estimate Asian,Estimate Other Race,White,Latino,Black,Asian,Other_Race,POC,Census Tract,County,predominant_race,predominant_race_pct,District_50,whites_below_average,poc_abv_average,white_poc,geometry
0,6065043216,6359,3356,1792,3003,417,600,194,52.775594,28.180532,6.557635,9.435446,3.050794,47.224406,CT 432.16,Riverside,White,52.775594,Demographics,1,1,2,"POLYGON ((-117.13023 33.51936, -117.13021 33.5..."
1,6065043218,5415,3561,1497,1854,74,211,72,65.761773,27.645429,1.366574,3.896584,1.32964,34.238227,CT 432.18,Riverside,White,65.761773,Demographics,0,0,0,"MULTIPOLYGON (((-117.12179 33.51653, -117.1219..."
