# Census- Employment Status Data Template

In [254]:
import pandas as pd
import requests

In [255]:
#Census Subject Table API for Employment Status data within Unified School Districts in California for 2019
url="https://api.census.gov/data/2019/acs/acs1/subject?get=group(S2301)&for=school%20district%20(unified)&in=state:06"

In [256]:
#Request for HTTP Data from Census API, which is working <Response [200]>
response = requests.get(url)

In [257]:
#Resetting data from API Data for future formatting, lists data in one column
response_json = response.json()

In [258]:
#Places data in a dataframe and drops index column 0 (with headers), which has 120 schools and 564 columns of variables
unifiedschool_df = pd.DataFrame(response_json,columns=response_json[0]).drop(0)

In [259]:
#View School District to view column titles
unifiedschool_df.loc[unifiedschool_df["NAME"]=="Hesperia Unified School District, California"]

Unnamed: 0,GEO_ID,NAME,S2301_C01_001E,S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA,S2301_C01_002E,S2301_C01_002EA,S2301_C01_002M,S2301_C01_002MA,...,S2301_C04_034E,S2301_C04_034EA,S2301_C04_034M,S2301_C04_034MA,S2301_C04_035E,S2301_C04_035EA,S2301_C04_035M,S2301_C04_035MA,state,school district (unified)
27,9700000US0600014,"Hesperia Unified School District, California",80008,,5263,,7595,,1782,,...,7.0,,5.5,,3.2,,2.9,,6,14


## _Format Data_

In [260]:
#Census Subject Table Variables for Employment Status data
variableurl = "https://api.census.gov/data/2019/acs/acs1/subject/variables.json"

In [261]:
#Request for HTTP Data from Census API and reset data
variables_json = requests.get(variableurl).json()

In [262]:
#View variable for column title
variables_json["variables"]["S2301_C01_001E"]

{'label': 'Estimate!!Total!!Population 16 years and over',
 'concept': 'EMPLOYMENT STATUS',
 'predicateType': 'int',
 'group': 'S2301',
 'limit': 0,
 'attributes': 'S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA'}

In [263]:
#Find and replace all columns with variable titles
new_labels = []
for col in unifiedschool_df.columns:
    label = variables_json["variables"].get(col)
    if label is not None:
        label = label['label']
    else:
        if col[-2:] == 'EA':
            label = variables_json["variables"].get(col[:-1])
            label = label['label'] + "||Annotation"
        elif col[-1] == 'M':
            label = variables_json["variables"].get(col[:-1]+'E')
            label = label['label'] + "||MarginOfError"
        elif col[-2:] == 'MA':
            label = variables_json["variables"].get(col[:-2]+'E')
            label = label['label'] + "||MarginOfErrorAnnotation"
    new_labels.append(label)  

In [264]:
#Find any columns without titles
new_labels  

['Geography',
 None,
 'Estimate!!Total!!Population 16 years and over',
 'Estimate!!Total!!Population 16 years and over||Annotation',
 'Estimate!!Total!!Population 16 years and over||MarginOfError',
 'Estimate!!Total!!Population 16 years and over||MarginOfErrorAnnotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||Annotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfErrorAnnotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||Annotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfErrorAnnotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!2

In [265]:
#Change column titles for columns labeled "None" 
assert len(new_labels) == len(unifiedschool_df.columns)

In [266]:
#Confirm the number of columns without titles
sum([1 for x in new_labels if x is None])

3

In [267]:
#Setup new Labels of columns labeled "None"
new_labels[1] = 'NAME'
new_labels[-2] = 'STATE'
new_labels[-1] = 'SCHOOL DISTRICT (UNIFIED)'

In [268]:
#Create new labels of columns labeled "None"
unifiedschool_df.columns = new_labels

In [269]:
#Find all columns without "Annotation" in column title
[col for col in unifiedschool_df.columns if "Annotation" not in col]

['Geography',
 'NAME',
 'Estimate!!Total!!Population 16 years and over',
 'Estimate!!Total!!Population 16 years and over||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years',
 'Esti

In [270]:
#Create a new dataframe for data without the columns that have "Annotation" in the title
without_annotation_df = unifiedschool_df[[col for col in unifiedschool_df.columns if "Annotation" not in col]].copy()

In [271]:
#Format to rename School District Name
def cleandistrict(NAME):
    return NAME.replace(", California","")

In [272]:
#Apply formatting condition to School District Name
without_annotation_df['NAME'].apply(cleandistrict)

1               Redlands Unified School District
2      West Contra Costa Unified School District
3              Riverside Unified School District
4                Rowland Unified School District
5        Sacramento City Unified School District
                         ...                    
116               Novato Unified School District
117              Oakland Unified School District
118            Oceanside Unified School District
119          Twin Rivers Unified School District
120              Turlock Unified School District
Name: NAME, Length: 120, dtype: object

In [273]:
#Create new School District name column with formatting titled District
without_annotation_df['District']= without_annotation_df['NAME'].apply(cleandistrict)

In [274]:
#Find length of Geographt column contents, which is 16
without_annotation_df['Geography'].apply(len).unique()

#Pull a geography
geo = "9700000US0600158"

#Find NCESDist ID
geo.split("US")[1]

'0600158'

In [275]:
#Format to seperate Census Geography code (the state and district ID combined), 
def splitGeo(geo):
    return geo.split("US")[1]

In [276]:
#Apply formatting condition to Geography
without_annotation_df['Geography'].apply(splitGeo)

1      0632070
2      0632550
3      0633150
4      0633750
5      0633840
        ...   
116    0627720
117    0628050
118    0628250
119    0601332
120    0600158
Name: Geography, Length: 120, dtype: object

In [277]:
##Create new Geography name column with formatting titled NCESDist
without_annotation_df['NCESDist']= without_annotation_df['Geography'].apply(splitGeo)

In [279]:
#Add Year column
without_annotation_df['year']= "2019"

In [280]:
without_annotation_df

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years||MarginOfError,...,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency)||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher||MarginOfError,STATE,SCHOOL DISTRICT (UNIFIED),District,NCESDist,year
1,9700000US0632070,"Redlands Unified School District, California",103753,5033,6839,1649,7518,1488,12138,2824,...,4.6,5.7,4.6,3.4,2.2,06,32070,Redlands Unified School District,0632070,2019
2,9700000US0632550,"West Contra Costa Unified School District, Cal...",205097,5334,11387,1736,15494,2184,16664,2555,...,1.3,1.8,1.0,3.2,1.6,06,32550,West Contra Costa Unified School District,0632550,2019
3,9700000US0633150,"Riverside Unified School District, California",215389,8763,21809,2711,25554,2985,25135,4099,...,1.3,3.5,2.3,1.4,1.0,06,33150,Riverside Unified School District,0633150,2019
4,9700000US0633750,"Rowland Unified School District, California",87017,8025,5516,1515,6086,1644,8259,1777,...,5.6,2.1,1.8,6.0,3.1,06,33750,Rowland Unified School District,0633750,2019
5,9700000US0633840,"Sacramento City Unified School District, Calif...",274468,9161,14299,2348,22712,2958,31915,3142,...,3.7,4.5,1.5,3.5,1.5,06,33840,Sacramento City Unified School District,0633840,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,9700000US0627720,"Novato Unified School District, California",53039,2000,3541,925,3833,1472,1416,777,...,3.5,6.0,4.9,1.3,1.5,06,27720,Novato Unified School District,0627720,2019
117,9700000US0628050,"Oakland Unified School District, California",353106,4191,15692,1922,23030,2400,39672,3625,...,1.9,4.5,1.7,2.9,0.9,06,28050,Oakland Unified School District,0628050,2019
118,9700000US0628250,"Oceanside Unified School District, California",107834,5924,4459,1438,13673,2639,11795,2006,...,4.8,5.0,2.7,5.1,4.0,06,28250,Oceanside Unified School District,0628250,2019
119,9700000US0601332,"Twin Rivers Unified School District, California",138033,8194,9426,2080,12839,2426,16082,2708,...,2.9,5.4,2.6,5.9,3.7,06,01332,Twin Rivers Unified School District,0601332,2019


In [237]:
#Rearrange columns in list
#['NCESDist','District'] + list(without_annotation_df.columns[2:])

In [227]:
#Rearrange columns in dataframe
#final = without_annotation_df[['NCESDist','District','Geography','NAME'] + list(without_annotation_df.columns[2:])]
#final

Unnamed: 0,NCESDist,District,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError,...,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency)||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher||MarginOfError,STATE,SCHOOL DISTRICT (UNIFIED),District.1,NCESDist.1
1,0632070,Redlands Unified School District,9700000US0632070,"Redlands Unified School District, California",103753,5033,6839,1649,7518,1488,...,3.9,4.6,5.7,4.6,3.4,2.2,06,32070,Redlands Unified School District,0632070
2,0632550,West Contra Costa Unified School District,9700000US0632550,"West Contra Costa Unified School District, Cal...",205097,5334,11387,1736,15494,2184,...,2.0,1.3,1.8,1.0,3.2,1.6,06,32550,West Contra Costa Unified School District,0632550
3,0633150,Riverside Unified School District,9700000US0633150,"Riverside Unified School District, California",215389,8763,21809,2711,25554,2985,...,2.2,1.3,3.5,2.3,1.4,1.0,06,33150,Riverside Unified School District,0633150
4,0633750,Rowland Unified School District,9700000US0633750,"Rowland Unified School District, California",87017,8025,5516,1515,6086,1644,...,10.5,5.6,2.1,1.8,6.0,3.1,06,33750,Rowland Unified School District,0633750
5,0633840,Sacramento City Unified School District,9700000US0633840,"Sacramento City Unified School District, Calif...",274468,9161,14299,2348,22712,2958,...,9.2,3.7,4.5,1.5,3.5,1.5,06,33840,Sacramento City Unified School District,0633840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,0627720,Novato Unified School District,9700000US0627720,"Novato Unified School District, California",53039,2000,3541,925,3833,1472,...,1.8,3.5,6.0,4.9,1.3,1.5,06,27720,Novato Unified School District,0627720
117,0628050,Oakland Unified School District,9700000US0628050,"Oakland Unified School District, California",353106,4191,15692,1922,23030,2400,...,3.6,1.9,4.5,1.7,2.9,0.9,06,28050,Oakland Unified School District,0628050
118,0628250,Oceanside Unified School District,9700000US0628250,"Oceanside Unified School District, California",107834,5924,4459,1438,13673,2639,...,6.6,4.8,5.0,2.7,5.1,4.0,06,28250,Oceanside Unified School District,0628250
119,0601332,Twin Rivers Unified School District,9700000US0601332,"Twin Rivers Unified School District, California",138033,8194,9426,2080,12839,2426,...,4.2,2.9,5.4,2.6,5.9,3.7,06,01332,Twin Rivers Unified School District,0601332


In [167]:
final['year']= "2019"

In [168]:
without_annotation_df

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years||MarginOfError,...,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency)||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher||MarginOfError,STATE,SCHOOL DISTRICT (UNIFIED),District,NCESDist,year
1,9700000US0632070,"Redlands Unified School District, California",103753,5033,6839,1649,7518,1488,12138,2824,...,4.6,5.7,4.6,3.4,2.2,06,32070,Redlands Unified School District,0632070,2019
2,9700000US0632550,"West Contra Costa Unified School District, Cal...",205097,5334,11387,1736,15494,2184,16664,2555,...,1.3,1.8,1.0,3.2,1.6,06,32550,West Contra Costa Unified School District,0632550,2019
3,9700000US0633150,"Riverside Unified School District, California",215389,8763,21809,2711,25554,2985,25135,4099,...,1.3,3.5,2.3,1.4,1.0,06,33150,Riverside Unified School District,0633150,2019
4,9700000US0633750,"Rowland Unified School District, California",87017,8025,5516,1515,6086,1644,8259,1777,...,5.6,2.1,1.8,6.0,3.1,06,33750,Rowland Unified School District,0633750,2019
5,9700000US0633840,"Sacramento City Unified School District, Calif...",274468,9161,14299,2348,22712,2958,31915,3142,...,3.7,4.5,1.5,3.5,1.5,06,33840,Sacramento City Unified School District,0633840,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,9700000US0627720,"Novato Unified School District, California",53039,2000,3541,925,3833,1472,1416,777,...,3.5,6.0,4.9,1.3,1.5,06,27720,Novato Unified School District,0627720,2019
117,9700000US0628050,"Oakland Unified School District, California",353106,4191,15692,1922,23030,2400,39672,3625,...,1.9,4.5,1.7,2.9,0.9,06,28050,Oakland Unified School District,0628050,2019
118,9700000US0628250,"Oceanside Unified School District, California",107834,5924,4459,1438,13673,2639,11795,2006,...,4.8,5.0,2.7,5.1,4.0,06,28250,Oceanside Unified School District,0628250,2019
119,9700000US0601332,"Twin Rivers Unified School District, California",138033,8194,9426,2080,12839,2426,16082,2708,...,2.9,5.4,2.6,5.9,3.7,06,01332,Twin Rivers Unified School District,0601332,2019


In [None]:
without_annotation_df[[col for col in without_annotation_df.columns if 'Labor Force Participation Rate!!EDUCATIONAL ATTAINMENT' in col]]