# New York City Census Data Analysis

#### Stephanie Andrade

In [1]:
import json
import requests
import pandas as pd
import geopandas as gpd

In [2]:
# Requesting Census Data - Population Example
# Our data is specifc to NYC so we need to condense this list to only include NYC data.
r = requests.get('https://api.census.gov/data/2015/acs/acs5?get=NAME,B01001_001E,B01001H_001E&for=county:*&for=tract:*&in=state:36')
print(r.text)

[["NAME","B01001_001E","B01001H_001E","state","county"],
["Schoharie County, New York","31913","29868","36","095"],
["Fulton County, New York","54606","50960","36","035"],
["Rensselaer County, New York","159900","135285","36","083"],
["Franklin County, New York","51280","41999","36","033"],
["Queens County, New York","2301139","601381","36","081"],
["Washington County, New York","62700","58182","36","115"],
["New York County, New York","1629507","767701","36","061"],
["Cayuga County, New York","79173","71808","36","011"],
["Rockland County, New York","320688","204596","36","087"],
["Niagara County, New York","214150","185219","36","063"],
["Essex County, New York","38912","35886","36","031"],
["Nassau County, New York","1354612","851645","36","059"],
["Jefferson County, New York","118947","98419","36","045"],
["Suffolk County, New York","1501373","1045354","36","103"],
["Wyoming County, New York","41446","37232","36","121"],
["Oswego County, New York","121183","114552","36","075"],
["W

In [3]:
censusdata = r.json()

# Turn JSON into a pandas dataframe
df = pd.DataFrame(censusdata[1:], columns=censusdata[0])
df.head()

Unnamed: 0,NAME,B01001_001E,B01001H_001E,state,county
0,"Schoharie County, New York",31913,29868,36,95
1,"Fulton County, New York",54606,50960,36,35
2,"Rensselaer County, New York",159900,135285,36,83
3,"Franklin County, New York",51280,41999,36,33
4,"Queens County, New York",2301139,601381,36,81


In [4]:
# Renaming the column to be more specfic
df.rename(columns = {'B01001_001E':'Population','B01001H_001E':'NonHispanicWhite'}, inplace=True)
df

Unnamed: 0,NAME,Population,NonHispanicWhite,state,county
0,"Schoharie County, New York",31913,29868,36,095
1,"Fulton County, New York",54606,50960,36,035
2,"Rensselaer County, New York",159900,135285,36,083
3,"Franklin County, New York",51280,41999,36,033
4,"Queens County, New York",2301139,601381,36,081
...,...,...,...,...,...
57,"Cattaraugus County, New York",78962,71985,36,009
58,"Madison County, New York",72427,67735,36,053
59,"Schenectady County, New York",154796,116261,36,093
60,"Genesee County, New York",59458,54029,36,037


In [5]:
# The data is a string
type(r.text)

str

In [6]:
# Create a new column that will give us the perentage of nonhispanic white individuals in New York State.
df['prc_NonHispanicWhite'] = df.NonHispanicWhite.astype(float) / df.Population.astype(float) * 100 
df.head(20)

Unnamed: 0,NAME,Population,NonHispanicWhite,state,county,prc_NonHispanicWhite
0,"Schoharie County, New York",31913,29868,36,95,93.591953
1,"Fulton County, New York",54606,50960,36,35,93.323078
2,"Rensselaer County, New York",159900,135285,36,83,84.606004
3,"Franklin County, New York",51280,41999,36,33,81.901326
4,"Queens County, New York",2301139,601381,36,81,26.134058
5,"Washington County, New York",62700,58182,36,115,92.794258
6,"New York County, New York",1629507,767701,36,61,47.11247
7,"Cayuga County, New York",79173,71808,36,11,90.697586
8,"Rockland County, New York",320688,204596,36,87,63.799082
9,"Niagara County, New York",214150,185219,36,63,86.490311
