# Census- Employment Status Data Template

In [1]:
import pandas as pd
import requests

In [2]:
#Census Subject Table API for Employment Status data within Elementary School Districts in California for 2019
url="https://api.census.gov/data/2019/acs/acs1/subject?get=group(S2301)&for=school%20district%20(elementary)&in=state:06"

In [3]:
#Request for HTTP Data from Census API, which is working <Response [200]>
response = requests.get(url)

In [4]:
#Resetting data from API Data for future formatting, lists data in one column
response_json = response.json()

In [5]:
#Places data in a dataframe and drops index column 0 (with headers), which has 52 schools and 564 columns of variables
elementaryschool_df = pd.DataFrame(response_json,columns=response_json[0]).drop(0)

In [6]:
elementaryschool_df

Unnamed: 0,GEO_ID,NAME,S2301_C01_001E,S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA,S2301_C01_002E,S2301_C01_002EA,S2301_C01_002M,S2301_C01_002MA,...,S2301_C04_034E,S2301_C04_034EA,S2301_C04_034M,S2301_C04_034MA,S2301_C04_035E,S2301_C04_035EA,S2301_C04_035M,S2301_C04_035MA,state,school district (elementary)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,,4487,,4282,,1090,,...,3.3,,3.2,,2.1,,3.5,,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,,2583,,6295,,1449,,...,2.2,,1.7,,3.2,,1.1,,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,,5231,,5622,,1451,,...,5.0,,3.1,,5.3,,3.2,,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,N,-999999999,N,-999999999,N,-999999999,N,...,10.7,,8.4,,1.7,,1.9,,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,,5078,,4601,,1282,,...,1.6,,1.5,,3.7,,2.7,,6,27180
6,9500000US0627810,"Oak Grove Elementary School District, California",95966,,6403,,4631,,1082,,...,3.1,,2.0,,1.9,,1.3,,6,27810
7,9500000US0642150,"Westminster School District, California",69118,,4649,,3888,,1048,,...,7.2,,4.3,,0.6,,1.0,,6,42150
8,9500000US0603630,"Bakersfield City School District, California",138160,,10047,,9580,,2007,,...,5.8,,3.2,,0.0,,1.8,,6,3630
9,9500000US0606390,"Panama-Buena Vista School District, California",99085,,8220,,7821,,2040,,...,2.4,,1.8,,0.9,,1.1,,6,6390
10,9500000US0612090,"El Monte City School District, California",72475,,5094,,5293,,1063,,...,4.2,,2.7,,1.1,,1.4,,6,12090


In [7]:
#View School District to view column titles, LA will not be present in other district types
elementaryschool_df.loc[elementaryschool_df["NAME"]=="Los Angeles Unified School District, California"]

Unnamed: 0,GEO_ID,NAME,S2301_C01_001E,S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA,S2301_C01_002E,S2301_C01_002EA,S2301_C01_002M,S2301_C01_002MA,...,S2301_C04_034E,S2301_C04_034EA,S2301_C04_034M,S2301_C04_034MA,S2301_C04_035E,S2301_C04_035EA,S2301_C04_035M,S2301_C04_035MA,state,school district (elementary)


## _Format Data_

In [8]:
#Census Subject Table Variables for Employment Status data
variableurl = "https://api.census.gov/data/2019/acs/acs1/subject/variables.json"

In [9]:
#Request for HTTP Data from Census API and reset data
variables_json = requests.get(variableurl).json()

In [10]:
#View variable for column title
variables_json["variables"]["S2301_C01_001E"]

{'label': 'Estimate!!Total!!Population 16 years and over',
 'concept': 'EMPLOYMENT STATUS',
 'predicateType': 'int',
 'group': 'S2301',
 'limit': 0,
 'attributes': 'S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA'}

In [11]:
#Find and replace all columns with variable titles
new_labels = []
for col in elementaryschool_df.columns:
    label = variables_json["variables"].get(col)
    if label is not None:
        label = label['label']
    else:
        if col[-2:] == 'EA':
            label = variables_json["variables"].get(col[:-1])
            label = label['label'] + "||Annotation"
        elif col[-1] == 'M':
            label = variables_json["variables"].get(col[:-1]+'E')
            label = label['label'] + "||MarginOfError"
        elif col[-2:] == 'MA':
            label = variables_json["variables"].get(col[:-2]+'E')
            label = label['label'] + "||MarginOfErrorAnnotation"
    new_labels.append(label)  

In [12]:
#Find any columns without titles
new_labels  

['Geography',
 None,
 'Estimate!!Total!!Population 16 years and over',
 'Estimate!!Total!!Population 16 years and over||Annotation',
 'Estimate!!Total!!Population 16 years and over||MarginOfError',
 'Estimate!!Total!!Population 16 years and over||MarginOfErrorAnnotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||Annotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfErrorAnnotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||Annotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfErrorAnnotation',
 'Estimate!!Total!!Population 16 years and over!!AGE!!2

In [13]:
#Change column titles for columns labeled "None" 
assert len(new_labels) == len(elementaryschool_df.columns)

In [14]:
#Confirm the number of columns without titles
sum([1 for x in new_labels if x is None])

3

In [15]:
#Setup new Labels of columns labeled "None"
new_labels[1] = 'NAME'
new_labels[-2] = 'STATE'
new_labels[-1] = 'SCHOOL DISTRICT (ELEMENTARY)'

In [16]:
#Create new labels of columns labeled "None"
elementaryschool_df.columns = new_labels

In [17]:
#Find all columns without "Annotation" in column title
#[col for col in elementaryschool_df.columns if "Annotation" not in col]

In [18]:
#Create a new dataframe for data without the columns that have "Annotation" in the title, which is 284 columns
without_annotation = elementaryschool_df[[col for col in elementaryschool_df.columns if "Annotation" not in col]].copy()

In [19]:
without_annotation.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years||MarginOfError,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years||MarginOfError,...,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency)||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree||MarginOfError,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher||MarginOfError,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4487,4282,1090,5057,1519,6403,1811,...,7.4,5.2,8.4,7.0,3.3,3.2,2.1,3.5,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,2583,6295,1449,6313,1490,11046,1562,...,9.7,9.7,1.2,2.1,2.2,1.7,3.2,1.1,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5231,5622,1451,6712,1437,9076,2402,...,6.3,8.3,3.4,3.6,5.0,3.1,5.3,3.2,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,0.0,6.7,0.0,8.9,10.7,8.4,1.7,1.9,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,5078,4601,1282,4639,1835,4124,1231,...,5.4,7.4,4.4,7.2,1.6,1.5,3.7,2.7,6,27180


In [20]:
#Find all columns without "MarginOfError" in column title
#[col for col in without_annotation_df.columns if "MarginOfError" not in col]

In [21]:
#Create a new dataframe for data without the columns that have "MarginOfError" in the title, which is 144 columns
withoutmarginerror = without_annotation[[col for col in without_annotation.columns if 'MarginOfError' not in col]].copy()

In [22]:
withoutmarginerror.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years,Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years,Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years,Estimate!!Total!!Population 16 years and over!!AGE!!55 to 59 years,...,Estimate!!Unemployment rate!!Population 20 to 64 years!!POVERTY STATUS IN THE PAST 12 MONTHS!!Below poverty level,Estimate!!Unemployment rate!!Population 20 to 64 years!!POVERTY STATUS IN THE PAST 12 MONTHS!!At or above the poverty level,Estimate!!Unemployment rate!!Population 20 to 64 years!!DISABILITY STATUS!!With any disability,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4282,5057,6403,6849,11192,9023,2987,...,14.4,4.7,16.6,5.5,7.4,8.4,3.3,2.1,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,6295,6313,11046,11901,20316,18948,9290,...,18.7,2.6,9.0,3.2,9.7,1.2,2.2,3.2,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5622,6712,9076,8451,11158,13537,6286,...,38.9,3.1,15.9,4.9,6.3,3.4,5.0,5.3,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,21.2,1.8,0.7,2.4,0.0,0.0,10.7,1.7,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,4601,4639,4124,5017,7396,11510,4920,...,-999999999.0,-999999999.0,-999999999.0,3.3,5.4,4.4,1.6,3.7,6,27180


In [23]:
#Find all columns without "Labor Force Participation Rate" in column title
#[col for col in withoutmarginerror.columns if "Labor Force Participation Rate" not in col]

In [24]:
#Create a new dataframe for data without the columns that have "Labor Force Participation Rate" in the title, which is 109 columns
withoutlaborforce = withoutmarginerror[[col for col in withoutmarginerror.columns if 'Labor Force Participation Rate' not in col]].copy()

In [25]:
withoutlaborforce.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years,Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years,Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years,Estimate!!Total!!Population 16 years and over!!AGE!!55 to 59 years,...,Estimate!!Unemployment rate!!Population 20 to 64 years!!POVERTY STATUS IN THE PAST 12 MONTHS!!Below poverty level,Estimate!!Unemployment rate!!Population 20 to 64 years!!POVERTY STATUS IN THE PAST 12 MONTHS!!At or above the poverty level,Estimate!!Unemployment rate!!Population 20 to 64 years!!DISABILITY STATUS!!With any disability,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4282,5057,6403,6849,11192,9023,2987,...,14.4,4.7,16.6,5.5,7.4,8.4,3.3,2.1,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,6295,6313,11046,11901,20316,18948,9290,...,18.7,2.6,9.0,3.2,9.7,1.2,2.2,3.2,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5622,6712,9076,8451,11158,13537,6286,...,38.9,3.1,15.9,4.9,6.3,3.4,5.0,5.3,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,21.2,1.8,0.7,2.4,0.0,0.0,10.7,1.7,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,4601,4639,4124,5017,7396,11510,4920,...,-999999999.0,-999999999.0,-999999999.0,3.3,5.4,4.4,1.6,3.7,6,27180


In [26]:
#Find all columns without "Sex" in column title
#[col for col in withoutlaborforce.columns if "SEX" not in col]

In [27]:
#Create a new dataframe for data without the columns that have "Sex" in the title, which is 91 columns
withoutsex = withoutlaborforce[[col for col in withoutlaborforce.columns if 'SEX' not in col]].copy()

In [28]:
withoutsex.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years,Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years,Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years,Estimate!!Total!!Population 16 years and over!!AGE!!55 to 59 years,...,Estimate!!Unemployment rate!!Population 20 to 64 years!!POVERTY STATUS IN THE PAST 12 MONTHS!!Below poverty level,Estimate!!Unemployment rate!!Population 20 to 64 years!!POVERTY STATUS IN THE PAST 12 MONTHS!!At or above the poverty level,Estimate!!Unemployment rate!!Population 20 to 64 years!!DISABILITY STATUS!!With any disability,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4282,5057,6403,6849,11192,9023,2987,...,14.4,4.7,16.6,5.5,7.4,8.4,3.3,2.1,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,6295,6313,11046,11901,20316,18948,9290,...,18.7,2.6,9.0,3.2,9.7,1.2,2.2,3.2,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5622,6712,9076,8451,11158,13537,6286,...,38.9,3.1,15.9,4.9,6.3,3.4,5.0,5.3,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,21.2,1.8,0.7,2.4,0.0,0.0,10.7,1.7,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,4601,4639,4124,5017,7396,11510,4920,...,-999999999.0,-999999999.0,-999999999.0,3.3,5.4,4.4,1.6,3.7,6,27180


In [29]:
#Find all columns without "Poverty Status" in column title
#[col for col in withoutsex.columns if "POVERTY STATUS" not in col]

In [30]:
#Create a new dataframe for data without the columns that have "Poverty Status" in the title, which is 85 columns
withoutps = withoutsex[[col for col in withoutsex.columns if 'POVERTY STATUS' not in col]].copy()

In [31]:
withoutps.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years,Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years,Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years,Estimate!!Total!!Population 16 years and over!!AGE!!55 to 59 years,...,"Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Unemployment rate!!Population 20 to 64 years,Estimate!!Unemployment rate!!Population 20 to 64 years!!DISABILITY STATUS!!With any disability,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4282,5057,6403,6849,11192,9023,2987,...,6.3,5.2,16.6,5.5,7.4,8.4,3.3,2.1,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,6295,6313,11046,11901,20316,18948,9290,...,3.2,3.1,9.0,3.2,9.7,1.2,2.2,3.2,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5622,6712,9076,8451,11158,13537,6286,...,5.4,5.2,15.9,4.9,6.3,3.4,5.0,5.3,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,2.3,-999999999.0,0.7,2.4,0.0,0.0,10.7,1.7,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,4601,4639,4124,5017,7396,11510,4920,...,3.9,3.4,-999999999.0,3.3,5.4,4.4,1.6,3.7,6,27180


In [32]:
#Find all columns without "Disability Status" in column title
#[col for col in withoutps.columns if "DISABILITY STATUS" not in col]

In [33]:
#Create a new dataframe for data without the columns that have "Disability Status" in the title, which is 82 columns
withoutds = withoutps[[col for col in withoutps.columns if 'DISABILITY STATUS' not in col]].copy()

In [34]:
withoutds.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years,Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years,Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years,Estimate!!Total!!Population 16 years and over!!AGE!!55 to 59 years,...,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Hispanic or Latino origin (of any race),"Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Unemployment rate!!Population 20 to 64 years,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Estimate!!Unemployment rate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4282,5057,6403,6849,11192,9023,2987,...,5.2,6.3,5.2,5.5,7.4,8.4,3.3,2.1,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,6295,6313,11046,11901,20316,18948,9290,...,-999999999.0,3.2,3.1,3.2,9.7,1.2,2.2,3.2,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5622,6712,9076,8451,11158,13537,6286,...,5.0,5.4,5.2,4.9,6.3,3.4,5.0,5.3,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,-999999999.0,2.3,-999999999.0,2.4,0.0,0.0,10.7,1.7,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,4601,4639,4124,5017,7396,11510,4920,...,-999999999.0,3.9,3.4,3.3,5.4,4.4,1.6,3.7,6,27180


In [35]:
#Find all columns without "Educational Attainment" in column title
#[col for col in withoutds.columns if "EDUCATIONAL ATTAINMENT" not in col]

In [36]:
#Create a new dataframe for data without the columns that have "Educational Attainment" in the title, which is 67 columns
withoutea = withoutds[[col for col in withoutds.columns if 'EDUCATIONAL ATTAINMENT' not in col]].copy()

In [37]:
withoutea.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Population 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Population 16 years and over!!AGE!!25 to 29 years,Estimate!!Total!!Population 16 years and over!!AGE!!30 to 34 years,Estimate!!Total!!Population 16 years and over!!AGE!!35 to 44 years,Estimate!!Total!!Population 16 years and over!!AGE!!45 to 54 years,Estimate!!Total!!Population 16 years and over!!AGE!!55 to 59 years,...,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Hispanic or Latino origin (of any race),"Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Unemployment rate!!Population 20 to 64 years,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,4282,5057,6403,6849,11192,9023,2987,...,-999999999.0,-999999999.0,-999999999.0,4.4,-999999999.0,5.2,6.3,5.2,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,6295,6313,11046,11901,20316,18948,9290,...,-999999999.0,2.5,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.2,3.1,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,5622,6712,9076,8451,11158,13537,6286,...,-999999999.0,4.7,-999999999.0,-999999999.0,-999999999.0,5.0,5.4,5.2,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,2.3,-999999999.0,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,4601,4639,4124,5017,7396,11510,4920,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.9,3.4,6,27180


In [38]:
#Find all columns without "Age" in column title
#[col for col in withoutea.columns if "AGE" not in col]

In [39]:
#Create a new dataframe for data without the columns that have "Age" in the title, which is 37 columns
withoutage = withoutea[[col for col in withoutea.columns if 'AGE' not in col]].copy()

In [40]:
withoutage.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,...,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Hispanic or Latino origin (of any race),"Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Unemployment rate!!Population 20 to 64 years,STATE,SCHOOL DISTRICT (ELEMENTARY)
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,21306,-999999999,-999999999,-999999999,-999999999,29512,-999999999,...,-999999999.0,-999999999.0,-999999999.0,4.4,-999999999.0,5.2,6.3,5.2,6,33930
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,57723,-999999999,-999999999,37442,-999999999,-999999999,-999999999,...,-999999999.0,2.5,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.2,3.1,6,34920
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,53903,-999999999,-999999999,13974,-999999999,-999999999,-999999999,...,-999999999.0,4.7,-999999999.0,-999999999.0,-999999999.0,5.0,5.4,5.2,6,28140
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,33047,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,2.3,-999999999.0,6,26280
5,9500000US0627180,"Newhall Elementary School District, California",57044,39422,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.9,3.4,6,27180


In [41]:
#Format to rename School District Name
def cleandistrict(NAME):
    return NAME.replace(", California","")

In [42]:
#Apply formatting condition to School District Name
withoutage['NAME'].apply(cleandistrict)

1               Salinas City Elementary School District
2      San Mateo-Foster City Elementary School District
3     Ocean View Elementary School District (Orange ...
4      Mountain View Whisman Elementary School District
5                    Newhall Elementary School District
6                  Oak Grove Elementary School District
7                           Westminster School District
8                      Bakersfield City School District
9                    Panama-Buena Vista School District
10                        El Monte City School District
11                      South Bay Union School District
12           Pleasant Valley Elementary School District
13                    Ontario-Montclair School District
14                 Lancaster Elementary School District
15               Tulare City Elementary School District
16                 Sunnyvale Elementary School District
17           Alum Rock Union Elementary School District
18                   Anaheim Elementary School D

In [43]:
#Create new School District name column with formatting titled District
withoutage['District']= withoutage['NAME'].apply(cleandistrict)

In [44]:
#Find length of Geographt column contents, which is 16
withoutage['Geography'].apply(len).unique()

#Pull a geography
geo = "9700000US0600158"

#Find NCESDist ID
geo.split("US")[1]

'0600158'

In [45]:
#Format to seperate Census Geography code (the state and district ID combined), 
def splitGeo(geo):
    return geo.split("US")[1]

In [46]:
#Apply formatting condition to Geography
withoutage['Geography'].apply(splitGeo)

1     0633930
2     0634920
3     0628140
4     0626280
5     0627180
6     0627810
7     0642150
8     0603630
9     0606390
10    0612090
11    0637380
12    0630990
13    0628470
14    0620880
15    0639870
16    0638460
17    0602310
18    0602610
19    0608610
20    0641040
21    0642120
22    0618030
23    0618870
24    0635970
25    0638670
26    0642450
27    0616680
28    0624600
29    0632130
30    0625130
31    0604800
32    0605580
33    0611850
34    0620250
35    0635590
36    0635810
37    0612880
38    0612750
39    0612960
40    0613140
41    0623430
42    0614370
43    0614730
44    0615510
45    0624540
46    0629220
47    0629580
48    0606810
49    0607200
50    0610290
51    0605910
52    0633600
Name: Geography, dtype: object

In [47]:
##Create new Geography name column with formatting titled NCESDist
withoutage['NCESDist']= withoutage['Geography'].apply(splitGeo)

In [48]:
#Add Year column
withoutage['year']= "2019"

In [49]:
withoutage.head()

Unnamed: 0,Geography,NAME,Estimate!!Total!!Population 16 years and over,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Estimate!!Total!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,...,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!Hispanic or Latino origin (of any race),"Estimate!!Unemployment rate!!Population 16 years and over!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone, not Hispanic or Latino",Estimate!!Unemployment rate!!Population 20 to 64 years,STATE,SCHOOL DISTRICT (ELEMENTARY),District,NCESDist,year
1,9500000US0633930,"Salinas City Elementary School District, Calif...",56443,21306,-999999999,-999999999,-999999999,-999999999,29512,-999999999,...,4.4,-999999999.0,5.2,6.3,5.2,6,33930,Salinas City Elementary School District,633930,2019
2,9500000US0634920,San Mateo-Foster City Elementary School Distri...,114601,57723,-999999999,-999999999,37442,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,-999999999.0,3.2,3.1,6,34920,San Mateo-Foster City Elementary School District,634920,2019
3,9500000US0628140,Ocean View Elementary School District (Orange ...,86182,53903,-999999999,-999999999,13974,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,5.0,5.4,5.2,6,28140,Ocean View Elementary School District (Orange ...,628140,2019
4,9500000US0626280,Mountain View Whisman Elementary School Distri...,-999999999,33047,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,-999999999.0,2.3,-999999999.0,6,26280,Mountain View Whisman Elementary School District,626280,2019
5,9500000US0627180,"Newhall Elementary School District, California",57044,39422,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,...,-999999999.0,-999999999.0,-999999999.0,3.9,3.4,6,27180,Newhall Elementary School District,627180,2019


In [50]:
withoutage.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Elementary2019.csv")

In [51]:
#Rearrange columns in list
#['NCESDist','District'] + list(without_annotation_df.columns[2:])

In [52]:
#Rearrange columns in dataframe
#final = without_annotation_df[['NCESDist','District','Geography','NAME'] + list(without_annotation_df.columns[2:])]
#final