# Census API - New York

In [1]:
# import packages

from us import states
import pandas as pd
import requests

# import doc with API key

import config

#### Census API Link References

In [2]:
# "https://api.census.gov/data/2019/acs/acs5/profile?get=NAME,DP05_0001E&for=state:36&key={0}".format(config.MY_API_KEY)
# "https://api.census.gov/data/2019/acs/acs5/profile?get=group(DP05)&for=school%20district%20(unified):29850&in=state:36&key={0}".format(config.MY_API_KEY)


## Get Table Names from Census API

In [3]:
variableURL = "https://api.census.gov/data/2019/acs/acs5/profile/variables"
censusVarNames = requests.request("GET", variableURL)
print(censusVarNames.json()[5])

['DP05_0050PE', 'Percent!!RACE!!Total population!!One race!!Asian!!Vietnamese', 'ACS DEMOGRAPHIC AND HOUSING ESTIMATES']


In [4]:
censusDataProfile = pd.DataFrame(columns=censusVarNames.json()[0], data=censusVarNames.json()[4:])
censusDataProfile.head()

Unnamed: 0,name,label,concept
0,DP02_0126E,Estimate!!ANCESTRY!!Total population!!Czech,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...
1,DP05_0050PE,Percent!!RACE!!Total population!!One race!!Asi...,ACS DEMOGRAPHIC AND HOUSING ESTIMATES
2,DP04_0047E,Estimate!!HOUSING TENURE!!Occupied housing uni...,SELECTED HOUSING CHARACTERISTICS
3,DP02PR_0083PE,Percent!!RESIDENCE 1 YEAR AGO!!Population 1 ye...,SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO
4,DP02PR_0106PE,Percent!!WORLD REGION OF BIRTH OF FOREIGN BORN...,SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO


## Import CSV

#### If a user is interested in looking at only select parts of the entire US Census set, save a separate text file with your list. 
#### You can pick and choose which school districts after compiling all the data, but it helps to filter out the data earlier on to minimize the size of your data frame

In [5]:
# This list is the top 20 school districts in Nassau County.

selectedNYCounties = pd.read_csv(r"Data\USCensusNYSchDist.txt",names=['School District Name'])
selectedNYCounties.head()

Unnamed: 0,School District Name
0,Jericho Union Free School District
1,Great Neck Public Schools
2,Syosset Central School District
3,Roslyn Union Free School District
4,Herricks Union Free School District


In [6]:
# Read Census data set which lists GEOIDs per each school district in state

URL = ("https://www2.census.gov/geo/docs/reference/codes/files/st36_ny_schdist.txt")
CountyGEOID = pd.read_csv(URL,names=['State', 'ID','GEOID','School District Name', 'Class'])
CountyGEOID.head()

Unnamed: 0,State,ID,GEOID,School District Name,Class
0,NY,36,1,Dolgeville Central School District,Unified
1,NY,36,2,Sauquoit Valley Central School District,Unified
2,NY,36,3,Edwards-Knox Central School District,Unified
3,NY,36,4,Rotterdam-Mohonasen Central School District,Unified
4,NY,36,5,Broadalbin-Perth Central School District,Unified


#### GEOIDs are going to be critical to using the Census API.

Create a new Dataframe, which combines all rows based on their "School District Name."
This will allow that first layer of filtering to happen -> you get the GEOIDs for all of the school districts you are interested in looking at.

Need to further investigate and understand Pandas concat, merge, join. Not sure why merge worked - so need to dig into that.


In [7]:
NYCountiesGEOID = NYCounties.merge(data, on=['School District Name'],how='inner')
NYCountiesGEOID

NameError: name 'NYCounties' is not defined

In [None]:
# Get all GEO IDs in a single list

GEOIDonly = joined['GEOID'].tolist()
separator = ', '
print(GEOIDonly)

In [None]:
# As a single string

GEOIDsinglestring = separator.join(map(str, GEOIDonly))
print(GEOIDsinglestring)

## Refactoring into Formulas

#### Breakdown into small discrete formulas

In [None]:
def jsontodf(response):
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

In [None]:
def dataProfSchDist(censusVariables,schoolDistricts):
    # first URL is for the detailed table. second URL is for the data profiles.
    # URL = "https://api.census.gov/data/2019/acs/acs5?get=NAME,{0}&for=school%20district%20(unified):{1}&in=state:36&key={2}".format(censusVariables,schoolDistricts,config.MY_API_KEY)

    URL = "https://api.census.gov/data/2019/acs/acs5/profile?get=NAME,{0}&for=school%20district%20(unified):{1}&in=state:36&key={2}".format(censusVariables,schoolDistricts,config.MY_API_KEY)
    return requests.request("GET", URL)

In [None]:
def deTabSchDist(censusVariables,schoolDistricts):
    # first URL is for the detailed table. second URL is for the data profiles.
    # URL = "https://api.census.gov/data/2019/acs/acs5?get=NAME,{0}&for=school%20district%20(unified):{1}&in=state:36&key={2}".format(censusVariables,schoolDistricts,config.MY_API_KEY)

    URL = "https://api.census.gov/data/2019/acs/acs5/?get=NAME,{0}&for=school%20district%20(unified):{1}&in=state:36&key={2}".format(censusVariables,schoolDistricts,config.MY_API_KEY)
    return requests.request("GET", URL)

#### Mega Formula

In [None]:
def dataProfile(cenVar, schDisID):
    x = dataProfSchDist(cenVar,schDisID)
    return jsontodf(x)

def detailedTable(cenVar, schDisID):
    x = deTabSchDist(cenVar,schDisID)
    return jsontodf(x)

#### Test

In [None]:
# Getting Census Data: Data Profiles

selectedVar = 'DP05_0002E,DP05_0003E'
GEOIDsinglestring

dataProfile(selectedVar,GEOIDsinglestring)

In [None]:
# Getting Census Data: Detailed Tables

selectedVar = 'B01001_002E,B01001_026E,B19013_001E'
GEOIDsinglestring

detailedTable(selectedVar,GEOIDsinglestring)