In [148]:
# import packages

from us import states
import pandas as pd
import requests

import config

In [149]:
# convert response to dataframe
# read about this later

def json_to_dataframe(response):
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

In [150]:
def schoolDistrictInput(variables,strings):
    URL = "https://api.census.gov/data/2019/acs/acs5?get=NAME,{0}&for=school%20district%20(unified):{1}&in=state:36&key={2}".format(variables,strings,config.MY_API_KEY)
    return requests.request("GET", URL)

In [151]:
# create data frame of responses
def dataFrame(responses):
    return json_to_dataframe(responses)

In [152]:
# read CSV with selected school districts - this is user defined!

NYCounties = pd.read_csv(r"Data\USCensusNYSchDist.txt",names=['School District Name'])

In [153]:
# read Census data set which lists GEOIDs per each school district in state

URL = ("https://www2.census.gov/geo/docs/reference/codes/files/st36_ny_schdist.txt")
data = pd.read_csv(URL,names=['State', 'ID','GEOID','School District Name', 'Class'])

In [154]:
# why did merge work instead of join?

joined = NYCounties.merge(data, on=['School District Name'],how='inner')
joined

Unnamed: 0,School District Name,State,ID,GEOID,Class
0,Jericho Union Free School District,NY,36,15810,Unified
1,Syosset Central School District,NY,36,28560,Unified
2,Roslyn Union Free School District,NY,36,25050,Unified
3,Herricks Union Free School District,NY,36,14280,Unified
4,Manhasset Union Free School District,NY,36,18270,Unified
5,East Williston Union Free School District,NY,36,10050,Unified
6,North Shore Central School District,NY,36,26370,Unified
7,Hewlett-Woodmere Union Free School District,NY,36,31710,Unified
8,Plainview-Old Bethpage Central School District,NY,36,23220,Unified
9,Bellmore-Merrick Central High School District,NY,36,19020,Secondary


In [155]:
# get all GEO IDs

GEOIDonly = joined['GEOID'].tolist()
GEOIDonly

[15810,
 28560,
 25050,
 14280,
 18270,
 10050,
 26370,
 31710,
 23220,
 19020,
 11760,
 23580,
 4740,
 17910,
 17700,
 24780,
 18630,
 29850,
 17160]

In [156]:
# get all GEO IDs

GEOIDonly = joined['GEOID'].tolist()
separator = ', '
GEOIDonlystring = map(str, GEOIDonly)
x = separator.join(GEOIDonlystring)
print(x)

15810, 28560, 25050, 14280, 18270, 10050, 26370, 31710, 23220, 19020, 11760, 23580, 4740, 17910, 17700, 24780, 18630, 29850, 17160


In [157]:
# census variables

cenVar = 'B01001_001E'

In [158]:
testResponse3 = schoolDistrictInput(cenVar, x)
dataFrame(testResponse3)

Unnamed: 0,NAME,B01001_001E,state,school district (unified)
0,"Roslyn Union Free School District, New York",18598,36,25050
1,"East Williston Union Free School District, New...",8886,36,10050
2,"North Shore Central School District, New York",16648,36,26370
3,"Garden City Union Free School District, New York",22499,36,11760
4,"Wantagh Union Free School District, New York",16739,36,29850
5,"Manhasset Union Free School District, New York",16847,36,18270
6,"Port Washington Union Free School District, Ne...",31028,36,23580
7,"Rockville Centre Union Free School District, N...",22069,36,24780
8,"Hewlett-Woodmere Union Free School District, N...",20077,36,31710
9,"Levittown Union Free School District, New York",45666,36,17160


In [159]:
# Responses are usually a list of list in the form of a string
# Each sub list is a row of data, with the first being the header names.
# Each item is double quoted. We need to fix this when we move into pandas.

print(testResponse3.text)

[["NAME","B01001_001E","state","school district (unified)"],
["Roslyn Union Free School District, New York","18598","36","25050"],
["East Williston Union Free School District, New York","8886","36","10050"],
["North Shore Central School District, New York","16648","36","26370"],
["Garden City Union Free School District, New York","22499","36","11760"],
["Wantagh Union Free School District, New York","16739","36","29850"],
["Manhasset Union Free School District, New York","16847","36","18270"],
["Port Washington Union Free School District, New York","31028","36","23580"],
["Rockville Centre Union Free School District, New York","22069","36","24780"],
["Hewlett-Woodmere Union Free School District, New York","20077","36","31710"],
["Levittown Union Free School District, New York","45666","36","17160"],
["Locust Valley Central School District, New York","15845","36","17700"],
["Lynbrook Union Free School District, New York","17255","36","17910"],
["Plainview-Old Bethpage Central School Dis

In [160]:
# JSON method response is a list of lists

print(testResponse3.json())

[['NAME', 'B01001_001E', 'state', 'school district (unified)'], ['Roslyn Union Free School District, New York', '18598', '36', '25050'], ['East Williston Union Free School District, New York', '8886', '36', '10050'], ['North Shore Central School District, New York', '16648', '36', '26370'], ['Garden City Union Free School District, New York', '22499', '36', '11760'], ['Wantagh Union Free School District, New York', '16739', '36', '29850'], ['Manhasset Union Free School District, New York', '16847', '36', '18270'], ['Port Washington Union Free School District, New York', '31028', '36', '23580'], ['Rockville Centre Union Free School District, New York', '22069', '36', '24780'], ['Hewlett-Woodmere Union Free School District, New York', '20077', '36', '31710'], ['Levittown Union Free School District, New York', '45666', '36', '17160'], ['Locust Valley Central School District, New York', '15845', '36', '17700'], ['Lynbrook Union Free School District, New York', '17255', '36', '17910'], ['Pl

In [161]:
column_names = ['name', 'population', 'state', 'school GEOID']

In [162]:
# construct data frame
# new column names get passed, json list of lists get passed through data param
# use slicing to skip row 0, which is the header

df = pd.DataFrame(columns=column_names, data=testResponse3.json()[1:])

# changes data type to integer from string
df["population"] = df["population"].astype(int)

df

Unnamed: 0,name,population,state,school GEOID
0,"Roslyn Union Free School District, New York",18598,36,25050
1,"East Williston Union Free School District, New...",8886,36,10050
2,"North Shore Central School District, New York",16648,36,26370
3,"Garden City Union Free School District, New York",22499,36,11760
4,"Wantagh Union Free School District, New York",16739,36,29850
5,"Manhasset Union Free School District, New York",16847,36,18270
6,"Port Washington Union Free School District, Ne...",31028,36,23580
7,"Rockville Centre Union Free School District, N...",22069,36,24780
8,"Hewlett-Woodmere Union Free School District, N...",20077,36,31710
9,"Levittown Union Free School District, New York",45666,36,17160


In [176]:
# Census Table Reference https://data.census.gov/cedsci/table?q=population&tid=ACSDP5Y2019.DP05&hidePreview=true

testURL = "https://api.census.gov/data/2019/acs/acs5/profile?get=group(DP05)&for=school%20district%20(unified):29850&in=state:36&key={0}".format(config.MY_API_KEY)
test = requests.request("GET", testURL)
print(test.text)
y = dataFrame(test)


[["DP05_0001E","DP05_0001EA","DP05_0001M","DP05_0001MA","DP05_0001PE","DP05_0001PEA","DP05_0001PM","DP05_0001PMA","DP05_0002E","DP05_0002EA","DP05_0002M","DP05_0002MA","DP05_0002PE","DP05_0002PEA","DP05_0002PM","DP05_0002PMA","DP05_0003E","DP05_0003EA","DP05_0003M","DP05_0003MA","DP05_0003PE","DP05_0003PEA","DP05_0003PM","DP05_0003PMA","DP05_0004E","DP05_0004EA","DP05_0004M","DP05_0004MA","DP05_0004PE","DP05_0004PEA","DP05_0004PM","DP05_0004PMA","DP05_0005E","DP05_0005EA","DP05_0005M","DP05_0005MA","DP05_0005PE","DP05_0005PEA","DP05_0005PM","DP05_0005PMA","DP05_0006E","DP05_0006EA","DP05_0006M","DP05_0006MA","DP05_0006PE","DP05_0006PEA","DP05_0006PM","DP05_0006PMA","DP05_0007E","DP05_0007EA","DP05_0007M","DP05_0007MA","DP05_0007PE","DP05_0007PEA","DP05_0007PM","DP05_0007PMA","DP05_0008E","DP05_0008EA","DP05_0008M","DP05_0008MA","DP05_0008PE","DP05_0008PEA","DP05_0008PM","DP05_0008PMA","DP05_0009E","DP05_0009EA","DP05_0009M","DP05_0009MA","DP05_0009PE","DP05_0009PEA","DP05_0009PM","DP05

Unnamed: 0,DP05_0001E,DP05_0001EA,DP05_0001M,DP05_0001MA,DP05_0001PE,DP05_0001PEA,DP05_0001PM,DP05_0001PMA,DP05_0002E,DP05_0002EA,...,DP05_0089M,DP05_0089MA,DP05_0089PE,DP05_0089PEA,DP05_0089PM,DP05_0089PMA,GEO_ID,NAME,state,school district (unified)
0,16739,,629,,16739,,-888888888,(X),8279,,...,248,,51.0,,1.7,,9700000US3629850,"Wantagh Union Free School District, New York",36,29850


In [171]:
# Census Table Reference https://data.census.gov/cedsci/table?q=population&tid=ACSDP5Y2019.DP05&hidePreview=true

testURL2 = "https://api.census.gov/data/2019/acs/acs5/profile?get=NAME,DP05_0001E&for=state:36&key={0}".format(config.MY_API_KEY)
test2 = requests.request("GET", testURL2)
print(test2.text)


[["NAME","DP05_0001E","state"],
["New York","19572319","36"]]


In [172]:
testURL3 = "https://api.census.gov/data/2019/acs/acs5/profile/variables"
test3 = requests.request("GET", testURL3)
print(test3.text)

[["name","label","concept"],
["for","Census API FIPS 'for' clause","Census API Geography Specification"],
["in","Census API FIPS 'in' clause","Census API Geography Specification"],
["ucgid","Uniform Census Geography Identifier clause","Census API Geography Specification"],
["DP02_0126E","Estimate!!ANCESTRY!!Total population!!Czech","SELECTED SOCIAL CHARACTERISTICS IN THE UNITED STATES"],
["DP05_0050PE","Percent!!RACE!!Total population!!One race!!Asian!!Vietnamese","ACS DEMOGRAPHIC AND HOUSING ESTIMATES"],
["DP04_0047E","Estimate!!HOUSING TENURE!!Occupied housing units!!Renter-occupied","SELECTED HOUSING CHARACTERISTICS"],
["DP02PR_0083PE","Percent!!RESIDENCE 1 YEAR AGO!!Population 1 year and over!!Different house in Puerto Rico or the U.S.!!In Puerto Rico!!Same municipio","SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO"],
["DP02PR_0106PE","Percent!!WORLD REGION OF BIRTH OF FOREIGN BORN!!Foreign-born population, excluding population born at sea!!Asia","SELECTED SOCIAL CHARACTERISTICS IN

In [174]:
x = dataFrame(test3)
x

Unnamed: 0,name,label,concept
0,for,Census API FIPS 'for' clause,Census API Geography Specification
1,in,Census API FIPS 'in' clause,Census API Geography Specification
2,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification
3,DP02_0126E,Estimate!!ANCESTRY!!Total population!!Czech,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...
4,DP05_0050PE,Percent!!RACE!!Total population!!One race!!Asi...,ACS DEMOGRAPHIC AND HOUSING ESTIMATES
...,...,...,...
1383,DP02PR_0001PE,Percent!!HOUSEHOLDS BY TYPE!!Total households,SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO
1384,DP03_0039PE,Percent!!INDUSTRY!!Civilian employed populatio...,SELECTED ECONOMIC CHARACTERISTICS
1385,DP02_0098E,Estimate!!YEAR OF ENTRY!!Population born outsi...,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...
1386,DP04_0095PE,Percent!!SELECTED MONTHLY OWNER COSTS (SMOC)!!...,SELECTED HOUSING CHARACTERISTICS


In [170]:
joined2 = x.merge(y, on=['name'],how='inner')
joined

AttributeError: 'str' object has no attribute 'merge'