In [1]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
from matplotlib import pyplot as plt
from shapely.geometry import Point, Polygon, LineString, MultiPolygon
from shapely.ops import unary_union
import numpy as np
import requests
import json
import pickle
%matplotlib inline 

In [2]:
from demography import *

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
pd.options.display.max_columns = 200

# Census API
Resources: 
- Census API page https://www.census.gov/data/developers/data-sets.html 
- Census API availvale datasets https://api.census.gov/data.html 


The code in the next section provides easy-to-use code snipts for accessing a pre-defined list variables based on topics. At this time race and age related variables are avaialble. Note that these variables are being loaded from the variables.py script and are imported in the import line. 

In [5]:
#Variables for base url
year='2020' 
data='acs'
data_name='acs5/profile'
# this comes from the list of variables 
columns = DEMOGRAPH
state='36'
county='005,047,061,081,085'

In [6]:
#first set the base url for acs19
acs_url = f'http://api.census.gov/data/{year}/{data}/{data_name}'
#now set the the data url
data_url = f'{acs_url}?get={columns}&for=tract:*&in=state:{state}&in=county:{county}'
#data_url

In [7]:
#retrieve the data
response=requests.get(data_url)
print(response.text)

[["DP05_0001E","DP05_0018E","DP03_0062E","DP05_0037PE","DP02_0068PE","DP03_0009PE","DP03_0021PE","DP04_0058PE","state","county","tract"],
["3453","36.1","41563","29.6","22.1","6.9","62.9","81.0","36","047","009202"],
["2293","31.6","51346","7.5","22.4","5.5","52.3","58.2","36","047","009401"],
["2746","33.9","58750","17.2","26.3","1.9","42.8","66.3","36","047","009402"],
["5858","38.0","62614","36.2","27.4","6.7","54.1","70.5","36","047","009600"],
["6021","33.6","56229","15.3","8.4","4.7","53.2","69.2","36","047","009800"],
["5978","35.7","65240","26.4","14.8","5.9","60.8","69.9","36","047","010000"],
["3944","32.6","96200","51.0","40.1","4.2","58.7","47.2","36","047","010100"],
["4844","33.6","59375","12.4","20.5","4.9","54.5","65.8","36","047","010200"],
["2152","37.4","47454","10.6","13.3","8.2","45.4","54.3","36","047","010401"],
["2256","36.8","32072","6.4","16.4","3.8","53.3","73.4","36","047","010402"],
["2315","38.9","50238","5.1","6.9","2.7","53.4","63.4","36","047","010601"]

In [8]:
#make dataframe:

#1. turn response into json
popdata=response.json()
popdata 

# transforminto a DF

df=pd.DataFrame(popdata[1:], columns=popdata[0])

df

Unnamed: 0,DP05_0001E,DP05_0018E,DP03_0062E,DP05_0037PE,DP02_0068PE,DP03_0009PE,DP03_0021PE,DP04_0058PE,state,county,tract
0,3453,36.1,41563,29.6,22.1,6.9,62.9,81.0,36,047,009202
1,2293,31.6,51346,7.5,22.4,5.5,52.3,58.2,36,047,009401
2,2746,33.9,58750,17.2,26.3,1.9,42.8,66.3,36,047,009402
3,5858,38.0,62614,36.2,27.4,6.7,54.1,70.5,36,047,009600
4,6021,33.6,56229,15.3,8.4,4.7,53.2,69.2,36,047,009800
...,...,...,...,...,...,...,...,...,...,...,...
2322,4809,41.0,90625,85.4,28.2,4.4,31.4,11.0,36,085,013204
2323,1067,38.6,11343,18.7,4.3,8.7,46.4,87.1,36,085,013301
2324,3320,30.1,68487,37.8,23.6,4.4,30.8,27.8,36,085,013302
2325,4127,37.5,86875,73.0,31.1,5.4,26.5,10.2,36,085,013400


In [9]:
df = df.rename(columns = DEMOGRAPH_GROUPS)
df

Unnamed: 0,Pop,Median_Age,Median_Income,Race_White,BachelorHigher,Unemployment,Mode_Public,NoVehicle,state,county,tract
0,3453,36.1,41563,29.6,22.1,6.9,62.9,81.0,36,047,009202
1,2293,31.6,51346,7.5,22.4,5.5,52.3,58.2,36,047,009401
2,2746,33.9,58750,17.2,26.3,1.9,42.8,66.3,36,047,009402
3,5858,38.0,62614,36.2,27.4,6.7,54.1,70.5,36,047,009600
4,6021,33.6,56229,15.3,8.4,4.7,53.2,69.2,36,047,009800
...,...,...,...,...,...,...,...,...,...,...,...
2322,4809,41.0,90625,85.4,28.2,4.4,31.4,11.0,36,085,013204
2323,1067,38.6,11343,18.7,4.3,8.7,46.4,87.1,36,085,013301
2324,3320,30.1,68487,37.8,23.6,4.4,30.8,27.8,36,085,013302
2325,4127,37.5,86875,73.0,31.1,5.4,26.5,10.2,36,085,013400


In [10]:
df.dtypes

Pop               object
Median_Age        object
Median_Income     object
Race_White        object
BachelorHigher    object
Unemployment      object
Mode_Public       object
NoVehicle         object
state             object
county            object
tract             object
dtype: object

In [11]:
df['GEOID'] = df.state+df.county+df.tract
df = df.drop(columns={'state','county','tract'})
df.head()

Unnamed: 0,Pop,Median_Age,Median_Income,Race_White,BachelorHigher,Unemployment,Mode_Public,NoVehicle,GEOID
0,3453,36.1,41563,29.6,22.1,6.9,62.9,81.0,36047009202
1,2293,31.6,51346,7.5,22.4,5.5,52.3,58.2,36047009401
2,2746,33.9,58750,17.2,26.3,1.9,42.8,66.3,36047009402
3,5858,38.0,62614,36.2,27.4,6.7,54.1,70.5,36047009600
4,6021,33.6,56229,15.3,8.4,4.7,53.2,69.2,36047009800


In [12]:
df = df.apply(pd.to_numeric, errors='coerce').dropna()
df.describe()

Unnamed: 0,Pop,Median_Age,Median_Income,Race_White,BachelorHigher,Unemployment,Mode_Public,NoVehicle,GEOID
count,2327.0,2327.0,2327.0,2327.0,2327.0,2327.0,2327.0,2327.0,2327.0
mean,3601.010743,-26357220.0,-34594770.0,-24638260.0,-24638270.0,-26643740.0,-26930190.0,-28076160.0,36055040000.0
std,2025.087211,129938700.0,148064100.0,125798600.0,125798600.0,130613700.0,131284700.0,133928700.0,25976240.0
min,0.0,-666666700.0,-666666700.0,-666666700.0,-666666700.0,-666666700.0,-666666700.0,-666666700.0,36005000000.0
25%,2192.5,33.0,46421.0,12.8,19.7,3.2,37.85,24.0,36047030000.0
50%,3363.0,37.1,67122.0,34.5,31.2,5.5,52.2,48.9,36047150000.0
75%,4683.5,41.7,89778.0,65.9,47.2,8.5,63.2,68.3,36081040000.0
max,16600.0,85.1,250001.0,100.0,100.0,100.0,100.0,100.0,36085990000.0


# Census Tract

In [13]:
ct2020 = gpd.read_file('./Data/GeoBase/nyct2020_22c/nyct2020.shp')
ct2020.head()

Unnamed: 0,CTLabel,BoroCode,BoroName,CT2020,BoroCT2020,CDEligibil,NTAName,NTA2020,CDTA2020,CDTANAME,GEOID,Shape_Leng,Shape_Area,geometry
0,1.0,1,Manhattan,100,1000100,,The Battery-Governors Island-Ellis Island-Libe...,MN0191,MN01,MN01 Financial District-Tribeca (CD 1 Equivalent),36061000100,11023.048501,1844421.0,"MULTIPOLYGON (((972081.788 190733.467, 972184...."
1,2.01,1,Manhattan,201,1000201,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061000201,4754.495244,972312.2,"POLYGON ((988548.218 197770.375, 987978.808 19..."
2,6.0,1,Manhattan,600,1000600,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061000600,6976.286456,2582705.0,"POLYGON ((986961.185 199553.643, 987206.139 19..."
3,14.01,1,Manhattan,1401,1001401,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061001401,5075.332,1006117.0,"POLYGON ((987475.016 200297.218, 987705.443 20..."
4,14.02,1,Manhattan,1402,1001402,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),36061001402,4459.156019,1226206.0,"POLYGON ((988387.669 201258.312, 988621.002 20..."


In [14]:
ct2020['GEOID'] = pd.to_numeric(ct2020['GEOID'])

In [15]:
Social_Economic = df.merge(ct2020[['GEOID','NTA2020']], on='GEOID')
Social_Economic = gpd.GeoDataFrame(Social_Economic, geometry=Social_Economic['geometry'],crs=2263)
Social_Economic

Unnamed: 0,Pop,Median_Age,Median_Income,Race_White,BachelorHigher,Unemployment,Mode_Public,NoVehicle,GEOID,geometry
0,3453,36.1,41563,29.6,22.1,6.9,62.9,81.0,36047009202,"POLYGON ((985286.415 174204.098, 985125.723 17..."
1,2293,31.6,51346,7.5,22.4,5.5,52.3,58.2,36047009401,"POLYGON ((984190.553 174077.201, 984028.580 17..."
2,2746,33.9,58750,17.2,26.3,1.9,42.8,66.3,36047009402,"POLYGON ((984802.200 173592.177, 984640.393 17..."
3,5858,38.0,62614,36.2,27.4,6.7,54.1,70.5,36047009600,"POLYGON ((983580.277 174561.258, 983417.994 17..."
4,6021,33.6,56229,15.3,8.4,4.7,53.2,69.2,36047009800,"POLYGON ((982932.782 173745.218, 982770.984 17..."
...,...,...,...,...,...,...,...,...,...,...
2320,4809,41.0,90625,85.4,28.2,4.4,31.4,11.0,36085013204,"POLYGON ((950892.000 143465.696, 950745.856 14..."
2321,1067,38.6,11343,18.7,4.3,8.7,46.4,87.1,36085013301,"POLYGON ((951720.949 170488.407, 951551.353 17..."
2322,3320,30.1,68487,37.8,23.6,4.4,30.8,27.8,36085013302,"POLYGON ((950562.207 173010.123, 950565.970 17..."
2323,4127,37.5,86875,73.0,31.1,5.4,26.5,10.2,36085013400,"POLYGON ((953756.092 147131.846, 953603.218 14..."


In [16]:
Social_Economic = Social_Economic.loc[Social_Economic['Median_Income']>0]
len(Social_Economic)

2206

In [17]:
ct2020 = gpd.read_file('./Data/GeoBase/nyct2020_22c/nyct2020.shp')
ct2020.head()

Unnamed: 0,Pop,Median_Age,Median_Income,Race_White,BachelorHigher,Unemployment,Mode_Public,NoVehicle,GEOID,geometry
2048,64,34.7,153365,93.8,86.0,0.0,11.7,100.0,36061009400,"POLYGON ((991331.656 214911.595, 991207.437 21..."
1102,65,32.1,126250,66.2,66.7,0.0,18.5,53.1,36047005302,"POLYGON ((983965.961 185441.193, 983983.599 18..."
2106,102,43.2,81875,17.6,37.6,25.4,38.0,92.0,36061011300,"POLYGON ((988650.277 214286.402, 988517.806 21..."
2092,160,69.2,80951,86.3,88.5,0.0,8.2,92.4,36061010200,"POLYGON ((992216.539 216507.687, 992091.031 21..."
1654,169,35.9,132292,100.0,41.1,31.0,46.0,24.7,36081053902,"POLYGON ((1008358.183 199174.582, 1008808.585 ..."
...,...,...,...,...,...,...,...,...,...,...
2196,12448,39.4,154332,81.5,77.9,2.4,56.9,78.6,36061013800,"POLYGON ((998004.556 221839.304, 997877.341 22..."
1344,12515,33.4,60502,32.6,26.8,8.0,61.1,78.6,36061025300,"POLYGON ((1001537.767 246825.562, 1001640.906 ..."
1340,14089,35.8,43084,25.0,32.7,17.9,71.3,91.6,36061024500,"POLYGON ((1000900.245 244770.330, 1000854.156 ..."
1582,14220,33.6,58984,30.7,30.0,7.1,57.0,62.2,36081045500,"POLYGON ((1023362.762 207430.576, 1023206.883 ..."


In [68]:
# save file
Social_Economic.to_file("./Data/Social_Economic/Social_Economic.geojson", driver='GeoJSON')