<a href="https://colab.research.google.com/github/nirumano/NYC_GIS_DataAnalysis/blob/main/NYC_2022_GeopandaAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Libraries**

In [52]:
import pandas as pd
import geopandas as gpd

from getpass import getpass
import plotly.express as px

import requests
import warnings

pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

census_api_key = getpass('Enter the api key: ')

Enter the api key: ··········


# **Census API Request Setup**

In [27]:
censusCode = 'B19013_001E' # Median household income code
year = '2022'
state = '36' # NYC
county_code = '061' #Manhattan
census_url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{census_code}&for=tract:*&in=state:{state}&in=county:{county_code}&key={census_api_key}'
response = requests.get(census_url)

In [28]:
df = gpd.read_file(f"https://www2.census.gov/geo/tiger/TIGER{year}/TRACT/tl_{year}_{state}_tract.zip")
df.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,36,7,12702,36007012702,127.02,Census Tract 127.02,G5020,S,65461841,222705,42.0350532,-75.9055509,"POLYGON ((-75.95917 42.00852, -75.95914 42.008..."
1,36,7,12800,36007012800,128.0,Census Tract 128,G5020,S,12342848,259435,42.1298743,-75.9096569,"POLYGON ((-75.94766 42.13727, -75.94453 42.137..."
2,36,7,12900,36007012900,129.0,Census Tract 129,G5020,S,14480163,63649,42.1522758,-75.9766029,"POLYGON ((-76.02229 42.15699, -76.01917 42.157..."
3,36,7,13000,36007013000,130.0,Census Tract 130,G5020,S,9934434,381729,42.1236499,-76.0002197,"POLYGON ((-76.02417 42.12590, -76.02409 42.125..."
4,36,7,13202,36007013202,132.02,Census Tract 132.02,G5020,S,2446208,3681,42.1238924,-76.0311921,"POLYGON ((-76.03996 42.13285, -76.03862 42.132..."


In [29]:
df.shape

(5411, 13)

In [30]:
censusDF = pd.DataFrame(response.json())
censusDF.head()

Unnamed: 0,0,1,2,3,4
0,NAME,B19013_001E,state,county,tract
1,Census Tract 1; New York County; New York,-666666666,36,061,000100
2,Census Tract 2.01; New York County; New York,45582,36,061,000201
3,Census Tract 2.02; New York County; New York,34710,36,061,000202
4,Census Tract 5; New York County; New York,-666666666,36,061,000500


# **Organize Dataset: Column/Row cleaning & GEOID Creation**

In [31]:
censusDF.columns = censusDF.iloc[0]
censusDF = censusDF.drop(censusDF.index[0])
censusDF.head()

Unnamed: 0,NAME,B19013_001E,state,county,tract
1,Census Tract 1; New York County; New York,-666666666,36,61,100
2,Census Tract 2.01; New York County; New York,45582,36,61,201
3,Census Tract 2.02; New York County; New York,34710,36,61,202
4,Census Tract 5; New York County; New York,-666666666,36,61,500
5,Census Tract 6; New York County; New York,25655,36,61,600


In [34]:
censusDF['GEOID'] = censusDF['state'] + censusDF['county'] + censusDF['tract']
censusDF[censusCode] = censusDF[censusCode].astype(int)
censusDF.head()

Unnamed: 0,NAME,B19013_001E,state,county,tract,GEOID
1,Census Tract 1; New York County; New York,-666666666,36,61,100,36061000100
2,Census Tract 2.01; New York County; New York,45582,36,61,201,36061000201
3,Census Tract 2.02; New York County; New York,34710,36,61,202,36061000202
4,Census Tract 5; New York County; New York,-666666666,36,61,500,36061000500
5,Census Tract 6; New York County; New York,25655,36,61,600,36061000600


# **Check for Outlier Data**

In [41]:
censusDF[censusCode].describe().apply("{0:.5f}".format)

count           310.00000
mean      -23544020.96774
std       123553829.57769
min      -666666666.00000
25%           57902.25000
50%          107666.50000
75%          159835.00000
max          250001.00000
Name: B19013_001E, dtype: object

In [42]:
filterCensusDF = censusDF.loc[censusDF[censusCode] > 0]
filterCensusDF.head()

Unnamed: 0,NAME,B19013_001E,state,county,tract,GEOID
2,Census Tract 2.01; New York County; New York,45582,36,61,201,36061000201
3,Census Tract 2.02; New York County; New York,34710,36,61,202,36061000202
5,Census Tract 6; New York County; New York,25655,36,61,600,36061000600
6,Census Tract 7; New York County; New York,184730,36,61,700,36061000700
7,Census Tract 8; New York County; New York,28607,36,61,800,36061000800


In [53]:
filterCensusDF.shape


(299, 6)

In [55]:
filterCensusDF[censusCode].describe()

count       299.000000
mean     116009.451505
std       62480.743786
min       11406.000000
25%       63300.500000
50%      112969.000000
75%      160780.000000
max      250001.000000
Name: B19013_001E, dtype: float64

# **Merge through Inner Join with Filtered Data & source DataFrame**

In [49]:
dfMerged = pd.merge(filterCensusDF, df,on=['GEOID'], how='inner')
dfGEO = gpd.GeoDataFrame(dfMerged).set_index('GEOID')
dfGEO.head()

Unnamed: 0_level_0,NAME_x,B19013_001E,state,county,tract,STATEFP,COUNTYFP,TRACTCE,NAME_y,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
36061000201,Census Tract 2.01; New York County; New York,45582,36,61,201,36,61,201,2.01,Census Tract 2.01,G5020,S,90233,75976,40.7098961,-73.9855833,"POLYGON ((-73.98717 40.71431, -73.98608 40.714..."
36061000202,Census Tract 2.02; New York County; New York,34710,36,61,202,36,61,202,2.02,Census Tract 2.02,G5020,S,310040,428737,40.7056637,-73.9814548,"POLYGON ((-73.98534 40.71446, -73.98471 40.714..."
36061000600,Census Tract 6; New York County; New York,25655,36,61,600,36,61,600,6.0,Census Tract 6,G5020,S,240406,176018,40.7102579,-73.9880558,"POLYGON ((-73.99256 40.71439, -73.99213 40.714..."
36061000700,Census Tract 7; New York County; New York,184730,36,61,700,36,61,700,7.0,Census Tract 7,G5020,S,253229,246239,40.6995508,-73.998769,"POLYGON ((-74.01251 40.70677, -74.01195 40.707..."
36061000800,Census Tract 8; New York County; New York,28607,36,61,800,36,61,800,8.0,Census Tract 8,G5020,S,220708,167906,40.7101181,-73.9932525,"POLYGON ((-73.99744 40.71407, -73.99648 40.714..."


# **Visualize Data**

In [50]:
df_geo_county = dfGEO.loc[dfGEO['county'] == '061']
fig = px.choropleth_mapbox(df_geo_county,
                        geojson=df_geo_county.geometry,
                        locations=df_geo_county.index,
                        color=censusCode,
                        color_continuous_scale='Viridis',
                        opacity=0.5,
                        center={"lat": 40.7831 , "lon": -73.9712},
                        mapbox_style="open-street-map",
                        zoom=10.0,
                        title='Manhattan, NYC: Median Household Income 2022'
                        )
fig.show()