# Census- Employment Status Data

In [None]:
import pandas as pd
import requests

In [None]:
# Census Subject Table API for Employment Status data within Elementary School Districts in California for 2018
url="https://api.census.gov/data/2018/acs/acs1/subject?get=group(S2301)&for=school%20district%20(elementary)&in=state:06"

In [None]:
# Request for HTTP Data from Census API, which is working <Response [200]>
response = requests.get(url)

In [None]:
# Resetting data from API Data for future formatting, lists data in one column
response_json = response.json()

In [None]:
# Places data in a dataframe and drops index column 0 (with headers), which has 55 schools and 564 columns of variables
elementaryschool_df = pd.DataFrame(response_json,columns=response_json[0]).drop(0)

In [None]:
elementaryschool_df

In [None]:
# Find Name column
# elementaryschool_df.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Elementary2018test.csv")

In [None]:
# View School District to view column titles
elementaryschool_df.loc[elementaryschool_df["NAME"]=="Los Angeles Unified School District, California"]

## _Format Data_

In [None]:
# Census Subject Table Variables for Employment Status data
variableurl = "https://api.census.gov/data/2018/acs/acs1/subject/variables.json"

In [None]:
# Request for HTTP Data from Census API and reset data
variables_json = requests.get(variableurl).json()

In [10]:
# View variable for column title
variables_json["variables"]["S2301_C01_001E"]

{'label': 'Estimate!!Total!!Population 16 years and over',
 'concept': 'EMPLOYMENT STATUS',
 'predicateType': 'int',
 'group': 'S2301',
 'limit': 0,
 'attributes': 'S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA'}

In [11]:
# Find and replace all columns with variable titles
new_labels = []
for col in elementaryschool_df.columns:
    label = variables_json["variables"].get(col)
    if label is not None:
        label = label['label']
    else:
        if col[-2:] == 'EA':
            label = variables_json["variables"].get(col[:-1])
            label = label['label'] + "||Annotation"
        elif col[-1] == 'M':
            label = variables_json["variables"].get(col[:-1]+'E')
            label = label['label'] + "||MarginOfError"
        elif col[-2:] == 'MA':
            label = variables_json["variables"].get(col[:-2]+'E')
            label = label['label'] + "||MarginOfErrorAnnotation"
    new_labels.append(label)

In [None]:
# Find any columns without titles
# new_labels  

In [13]:
# Change column titles for columns labeled "None" 
assert len(new_labels) == len(elementaryschool_df.columns)

In [None]:
# Confirm the number of columns without titles
sum([1 for x in new_labels if x is None])

In [15]:
#Setup new Labels of columns labeled "None"
new_labels[-283] = 'NAME'
new_labels[-2] = 'STATE'
new_labels[-1] = 'SCHOOL DISTRICT (ELEMENTARY)'

In [16]:
#Create new labels of columns labeled "None"
elementaryschool_df.columns = new_labels

In [17]:
#Find all columns without "Annotation" in column title
[col for col in elementaryschool_df.columns if "Annotation" not in col]

['Geography',
 'Estimate!!Total!!Population 16 years and over',
 'Estimate!!Total!!Population 16 years and over||MarginOfError',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over||MarginOfError',
 'Estimate!!Employment/Population Ratio!!Population 16 years and over',
 'Estimate!!Employment/Population Ratio!!Population 16 years and over||MarginOfError',
 'Estimate!!Unemployment rate!!Population 16 years and over',
 'Estimate!!Unemployment rate!!Population 16 years and over||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Employment/Population Ratio!!Pop

In [18]:
#Create a new dataframe for data without the columns that have "Annotation" in the title, which is 284 columns
without_annotation = elementaryschool_df[[col for col in elementaryschool_df.columns if "Annotation" not in col]].copy()

In [19]:
#Find all columns without "MarginOfError" in column title
#[col for col in without_annotation_df.columns if "MarginOfError" not in col]

In [20]:
#Create a new dataframe for data without the columns that have "MarginOfError" in the title, which is 144 columns
withoutmarginerror = without_annotation[[col for col in without_annotation.columns if 'MarginOfError' not in col]].copy()

In [21]:
#Find all columns without "Labor Force Participation Rate" in column title
#[col for col in withoutmarginerror.columns if "Labor Force Participation Rate" not in col]

In [22]:
#Create a new dataframe for data without the columns that have "Labor Force Participation Rate" in the title, which is 109 columns
withoutlaborforce = withoutmarginerror[[col for col in withoutmarginerror.columns if 'Labor Force Participation Rate' not in col]].copy()

In [23]:
#Find all columns without "Sex" in column title
#[col for col in withoutlaborforce.columns if "SEX" not in col]

In [24]:
#Create a new dataframe for data without the columns that have "Sex" in the title, which is 91 columns
withoutsex = withoutlaborforce[[col for col in withoutlaborforce.columns if 'SEX' not in col]].copy()

In [25]:
#Find all columns without "Poverty Status" in column title
#[col for col in withoutsex.columns if "POVERTY STATUS" not in col]

In [26]:
#Create a new dataframe for data without the columns that have "Poverty Status" in the title, which is 85 columns
withoutps = withoutsex[[col for col in withoutsex.columns if 'POVERTY STATUS' not in col]].copy()

In [27]:
#Find all columns without "Disability Status" in column title
#[col for col in withoutps.columns if "DISABILITY STATUS" not in col]

In [28]:
#Create a new dataframe for data without the columns that have "Disability Status" in the title, which is 82 columns
withoutds = withoutps[[col for col in withoutps.columns if 'DISABILITY STATUS' not in col]].copy()

In [29]:
#Find all columns without "Educational Attainment" in column title
#[col for col in withoutds.columns if "EDUCATIONAL ATTAINMENT" not in col]

In [30]:
#Create a new dataframe for data without the columns that have "Educational Attainment" in the title, which is 67 columns
withoutea = withoutds[[col for col in withoutds.columns if 'EDUCATIONAL ATTAINMENT' not in col]].copy()

In [31]:
#Find all columns without "Age" in column title
#[col for col in withoutea.columns if "AGE" not in col]

In [32]:
#Create a new dataframe for data without the columns that have "Age" in the title, which is 37 columns
withoutage = withoutea[[col for col in withoutea.columns if 'AGE' not in col]].copy()

In [33]:
withoutage

Unnamed: 0,Geography,Estimate!!Total!!Population 16 years and over,Estimate!!Employment/Population Ratio!!Population 16 years and over,Estimate!!Unemployment rate!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Employment/Population Ratio!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Estimate!!Employment/Population Ratio!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,...,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Hispanic or Latino origin (of any race),"Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino","Estimate!!Employment/Population Ratio!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino","Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Total!!Population 20 to 64 years,Estimate!!Employment/Population Ratio!!Population 20 to 64 years,Estimate!!Unemployment rate!!Population 20 to 64 years,NAME,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0620880,81690,48.4,9.2,46907,51.2,6.5,17546,36.2,20.1,...,6.9,25659,54.2,7.6,62238,58.9,8.7,"Lancaster Elementary School District, California",6,20880
2,9500000US0627180,59108,60.9,5.0,43101,60.4,5.5,-999999999,-999999999.0,-999999999.0,...,-999999999.0,32299,57.5,7.5,44752,73.3,5.0,"Newhall Elementary School District, California",6,27180
3,9500000US0642150,69721,56.5,5.7,27117,55.6,5.8,-999999999,-999999999.0,-999999999.0,...,9.4,17763,56.2,4.9,51164,72.7,5.4,"Westminster School District, California",6,42150
4,9500000US0628470,143249,63.3,6.1,65946,63.6,3.9,-999999999,-999999999.0,-999999999.0,...,5.9,19017,56.3,4.1,113697,74.4,5.5,"Ontario-Montclair School District, California",6,28470
5,9500000US0639870,44492,61.1,6.2,35877,62.1,4.6,-999999999,-999999999.0,-999999999.0,...,7.3,12566,61.7,2.5,35152,69.8,6.6,"Tulare City Elementary School District, Califo...",6,39870
6,9500000US0642450,51933,62.1,3.6,24790,61.5,4.7,-999999999,-999999999.0,-999999999.0,...,4.0,8502,59.5,1.5,40434,75.7,3.3,"Whittier City Elementary School District, Cali...",6,42450
7,9500000US0613500,54425,38.5,8.5,45342,39.7,7.9,-999999999,-999999999.0,-999999999.0,...,8.4,30653,32.5,9.4,38799,48.1,8.1,"Fallbrook Union Elementary School District, Ca...",6,13500
8,9500000US0620250,113134,61.5,4.0,83615,61.0,3.1,-999999999,-999999999.0,-999999999.0,...,2.7,58232,57.8,3.3,86313,74.9,3.8,"La Mesa-Spring Valley School District, California",6,20250
9,9500000US0614370,70871,63.3,5.4,21874,61.5,4.1,-999999999,-999999999.0,-999999999.0,...,4.7,-999999999,-999999999.0,-999999999.0,54795,75.2,4.4,"Franklin-McKinley Elementary School District, ...",6,14370
10,9500000US0618870,81646,67.5,4.5,17826,59.9,7.1,-999999999,-999999999.0,-999999999.0,...,-999999999.0,10523,53.7,10.3,62647,81.7,4.4,Jefferson Elementary School District (San Mate...,6,18870


In [34]:
#Format to rename School District Name
def cleandistrict(NAME):
    return NAME.replace(", California","")

In [35]:
#Apply formatting condition to School District Name
withoutage['NAME'].apply(cleandistrict)

1                  Lancaster Elementary School District
2                    Newhall Elementary School District
3                           Westminster School District
4                     Ontario-Montclair School District
5                Tulare City Elementary School District
6              Whittier City Elementary School District
7            Fallbrook Union Elementary School District
8                 La Mesa-Spring Valley School District
9          Franklin-McKinley Elementary School District
10    Jefferson Elementary School District (San Mate...
11           Alum Rock Union Elementary School District
12           Brentwood Union Elementary School District
13        Cajon Valley Union Elementary School District
14                    Victor Elementary School District
15     Huntington Beach City Elementary School District
16                        Menifee Union School District
17              Salinas City Elementary School District
18              Goleta Union Elementary School D

In [36]:
#Create new School District name column with formatting titled District
withoutage['District']= withoutage['NAME'].apply(cleandistrict)

In [37]:
#Find length of Geographt column contents, which is 16
withoutage['Geography'].apply(len).unique()

#Pull a geography
geo = "9700000US0622710"

#Find NCESDist ID
geo.split("US")[1]

'0622710'

In [38]:
#Format to seperate Census Geography code (the state and district ID combined), 
def splitGeo(geo):
    return geo.split("US")[1]

In [39]:
#Apply formatting condition to Geography
withoutage['Geography'].apply(splitGeo)

1     0620880
2     0627180
3     0642150
4     0628470
5     0639870
6     0642450
7     0613500
8     0620250
9     0614370
10    0618870
11    0602310
12    0605910
13    0606810
14    0641040
15    0618030
16    0624540
17    0633930
18    0615510
19    0626280
20    0611850
21    0605580
22    0617850
23    0635590
24    0623430
25    0635970
26    0629580
27    0638220
28    0614730
29    0604800
30    0607200
31    0634920
32    0602610
33    0624600
34    0632130
35    0625130
36    0638670
37    0642120
38    0606390
39    0629220
40    0633600
41    0610290
42    0637380
43    0612750
44    0612880
45    0628140
46    0608610
47    0630990
48    0612090
49    0638460
50    0616680
51    0612960
52    0613140
53    0635810
54    0603630
55    0627810
Name: Geography, dtype: object

In [40]:
##Create new Geography name column with formatting titled NCESDist
withoutage['NCESDist']= withoutage['Geography'].apply(splitGeo)

In [41]:
#Add Year column
withoutage['year']= "2018"

In [42]:
withoutage.head()

Unnamed: 0,Geography,Estimate!!Total!!Population 16 years and over,Estimate!!Employment/Population Ratio!!Population 16 years and over,Estimate!!Unemployment rate!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Employment/Population Ratio!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Estimate!!Employment/Population Ratio!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,...,"Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Total!!Population 20 to 64 years,Estimate!!Employment/Population Ratio!!Population 20 to 64 years,Estimate!!Unemployment rate!!Population 20 to 64 years,NAME,STATE,SCHOOL DISTRICT (ELEMENTARY),District,NCESDist,year
1,9500000US0620880,81690,48.4,9.2,46907,51.2,6.5,17546,36.2,20.1,...,7.6,62238,58.9,8.7,"Lancaster Elementary School District, California",6,20880,Lancaster Elementary School District,620880,2018
2,9500000US0627180,59108,60.9,5.0,43101,60.4,5.5,-999999999,-999999999.0,-999999999.0,...,7.5,44752,73.3,5.0,"Newhall Elementary School District, California",6,27180,Newhall Elementary School District,627180,2018
3,9500000US0642150,69721,56.5,5.7,27117,55.6,5.8,-999999999,-999999999.0,-999999999.0,...,4.9,51164,72.7,5.4,"Westminster School District, California",6,42150,Westminster School District,642150,2018
4,9500000US0628470,143249,63.3,6.1,65946,63.6,3.9,-999999999,-999999999.0,-999999999.0,...,4.1,113697,74.4,5.5,"Ontario-Montclair School District, California",6,28470,Ontario-Montclair School District,628470,2018
5,9500000US0639870,44492,61.1,6.2,35877,62.1,4.6,-999999999,-999999999.0,-999999999.0,...,2.5,35152,69.8,6.6,"Tulare City Elementary School District, Califo...",6,39870,Tulare City Elementary School District,639870,2018


In [43]:
withoutage.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Elementary2018.csv")

In [44]:
#Rearrange columns in list
#['NCESDist','District'] + list(without_annotation_df.columns[2:])

In [45]:
#Rearrange columns in dataframe
#final = without_annotation_df[['NCESDist','District','Geography','NAME'] + list(without_annotation_df.columns[2:])]
#final