# Census- Employment Status Data

In [1]:
import pandas as pd
import requests

In [2]:
#Census Subject Table API for Employment Status data within Unified School Districts in California for 2018
url="https://api.census.gov/data/2018/acs/acs1/subject?get=group(S2301)&for=school%20district%20(unified)&in=state:06"

In [3]:
#Request for HTTP Data from Census API, which is working <Response [200]>
response = requests.get(url)

In [4]:
#Resetting data from API Data for future formatting, lists data in one column
response_json = response.json()

In [5]:
#Places data in a dataframe and drops index column 0 (with headers), which has 120 schools and 564 columns of variables
unifiedschool_df = pd.DataFrame(response_json,columns=response_json[0]).drop(0)

In [6]:
#Find Name column
#unifiedschool_df.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Unified2018test.csv")

In [7]:
#View School District to view column titles
unifiedschool_df.loc[unifiedschool_df["NAME"]=="Los Angeles Unified School District, California"]

Unnamed: 0,GEO_ID,S2301_C01_001E,S2301_C01_001M,S2301_C02_001E,S2301_C02_001M,S2301_C03_001E,S2301_C03_001M,S2301_C04_001E,S2301_C04_001M,S2301_C01_002E,...,S2301_C01_035EA,S2301_C01_035MA,S2301_C02_035EA,S2301_C02_035MA,S2301_C03_035EA,S2301_C03_035MA,S2301_C04_035EA,S2301_C04_035MA,state,school district (unified)
79,9700000US0622710,3877095,15846,66.7,0.4,62.6,0.4,6.0,0.2,233195,...,,,,,,,,,6,22710


## _Format Data_

In [8]:
#Census Subject Table Variables for Employment Status data
variableurl = "https://api.census.gov/data/2018/acs/acs1/subject/variables.json"

In [9]:
#Request for HTTP Data from Census API and reset data
variables_json = requests.get(variableurl).json()

In [10]:
#View variable for column title
variables_json["variables"]["S2301_C01_001E"]

{'label': 'Estimate!!Total!!Population 16 years and over',
 'concept': 'EMPLOYMENT STATUS',
 'predicateType': 'int',
 'group': 'S2301',
 'limit': 0,
 'attributes': 'S2301_C01_001EA,S2301_C01_001M,S2301_C01_001MA'}

In [11]:
#Find and replace all columns with variable titles
new_labels = []
for col in unifiedschool_df.columns:
    label = variables_json["variables"].get(col)
    if label is not None:
        label = label['label']
    else:
        if col[-2:] == 'EA':
            label = variables_json["variables"].get(col[:-1])
            label = label['label'] + "||Annotation"
        elif col[-1] == 'M':
            label = variables_json["variables"].get(col[:-1]+'E')
            label = label['label'] + "||MarginOfError"
        elif col[-2:] == 'MA':
            label = variables_json["variables"].get(col[:-2]+'E')
            label = label['label'] + "||MarginOfErrorAnnotation"
    new_labels.append(label)

In [12]:
#Find any columns without titles
new_labels  

['Geography',
 'Estimate!!Total!!Population 16 years and over',
 'Estimate!!Total!!Population 16 years and over||MarginOfError',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over||MarginOfError',
 'Estimate!!Employment/Population Ratio!!Population 16 years and over',
 'Estimate!!Employment/Population Ratio!!Population 16 years and over||MarginOfError',
 'Estimate!!Unemployment rate!!Population 16 years and over',
 'Estimate!!Unemployment rate!!Population 16 years and over||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Employment/Population Ratio!!Pop

In [13]:
#Change column titles for columns labeled "None" 
assert len(new_labels) == len(unifiedschool_df.columns)

In [14]:
#Confirm the number of columns without titles
sum([1 for x in new_labels if x is None])

3

In [15]:
#Setup new Labels of columns labeled "None"
new_labels[-283] = 'NAME'
new_labels[-2] = 'STATE'
new_labels[-1] = 'SCHOOL DISTRICT'

In [16]:
#Create new labels of columns labeled "None"
unifiedschool_df.columns = new_labels

In [17]:
#Find all columns without "Annotation" in column title
[col for col in unifiedschool_df.columns if "Annotation" not in col]

['Geography',
 'Estimate!!Total!!Population 16 years and over',
 'Estimate!!Total!!Population 16 years and over||MarginOfError',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over||MarginOfError',
 'Estimate!!Employment/Population Ratio!!Population 16 years and over',
 'Estimate!!Employment/Population Ratio!!Population 16 years and over||MarginOfError',
 'Estimate!!Unemployment rate!!Population 16 years and over',
 'Estimate!!Unemployment rate!!Population 16 years and over||MarginOfError',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Total!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over!!AGE!!16 to 19 years',
 'Estimate!!Labor Force Participation Rate!!Population 16 years and over!!AGE!!16 to 19 years||MarginOfError',
 'Estimate!!Employment/Population Ratio!!Pop

In [18]:
#Create a new dataframe for data without the columns that have "Annotation" in the title, which is 284 columns
without_annotation = unifiedschool_df[[col for col in unifiedschool_df.columns if "Annotation" not in col]].copy()

In [19]:
#Find all columns without "MarginOfError" in column title
#[col for col in without_annotation_df.columns if "MarginOfError" not in col]

In [20]:
#Create a new dataframe for data without the columns that have "MarginOfError" in the title, which is 144 columns
withoutmarginerror = without_annotation[[col for col in without_annotation.columns if 'MarginOfError' not in col]].copy()

In [21]:
#Find all columns without "Labor Force Participation Rate" in column title
#[col for col in withoutmarginerror.columns if "Labor Force Participation Rate" not in col]

In [22]:
#Create a new dataframe for data without the columns that have "Labor Force Participation Rate" in the title, which is 109 columns
withoutlaborforce = withoutmarginerror[[col for col in withoutmarginerror.columns if 'Labor Force Participation Rate' not in col]].copy()

In [23]:
#Find all columns without "Sex" in column title
#[col for col in withoutlaborforce.columns if "SEX" not in col]

In [24]:
#Create a new dataframe for data without the columns that have "Sex" in the title, which is 91 columns
withoutsex = withoutlaborforce[[col for col in withoutlaborforce.columns if 'SEX' not in col]].copy()

In [25]:
#Find all columns without "Poverty Status" in column title
#[col for col in withoutsex.columns if "POVERTY STATUS" not in col]

In [26]:
#Create a new dataframe for data without the columns that have "Poverty Status" in the title, which is 85 columns
withoutps = withoutsex[[col for col in withoutsex.columns if 'POVERTY STATUS' not in col]].copy()

In [27]:
#Find all columns without "Disability Status" in column title
#[col for col in withoutps.columns if "DISABILITY STATUS" not in col]

In [28]:
#Create a new dataframe for data without the columns that have "Disability Status" in the title, which is 82 columns
withoutds = withoutps[[col for col in withoutps.columns if 'DISABILITY STATUS' not in col]].copy()

In [29]:
#Find all columns without "Educational Attainment" in column title
#[col for col in withoutds.columns if "EDUCATIONAL ATTAINMENT" not in col]

In [30]:
#Create a new dataframe for data without the columns that have "Educational Attainment" in the title, which is 67 columns
withoutea = withoutds[[col for col in withoutds.columns if 'EDUCATIONAL ATTAINMENT' not in col]].copy()

In [31]:
#Find all columns without "Age" in column title
#[col for col in withoutea.columns if "AGE" not in col]

In [32]:
#Create a new dataframe for data without the columns that have "Age" in the title, which is 37 columns
withoutage = withoutea[[col for col in withoutea.columns if 'AGE' not in col]].copy()

In [33]:
#Find all columns without "Race" in column title
#[col for col in withoutage.columns if "RACE" not in col]

In [34]:
#Create a new dataframe for data without the columns that have "Race" in the title, which is 10 columns
withoutrace = withoutage[[col for col in withoutage.columns if 'RACE' not in col]].copy()

In [35]:
#Format to rename School District Name
#def cleandistrict(NAME):
    #return NAME.replace(", California","")

In [36]:
#Apply formatting condition to School District Name
#withoutrace['NAME'].apply(cleandistrict)

In [37]:
#Create new School District name column with formatting titled District
#withoutrace['District']= withoutrace['NAME'].apply(cleandistrict)

In [38]:
#Find length of Geographt column contents, which is 16
withoutrace['Geography'].apply(len).unique()

#Pull a geography
geo = "9700000US0622710"

#Find NCESDist ID
geo.split("US")[1]

'0622710'

In [39]:
#Format to seperate Census Geography code (the state and district ID combined), 
def splitGeo(geo):
    return geo.split("US")[1]

In [40]:
#Apply formatting condition to Geography
withoutrace['Geography'].apply(splitGeo)

1      0604740
2      0634710
3      0604290
4      0610620
5      0611110
        ...   
116    0622230
117    0625830
118    0633150
119    0638010
120    0643080
Name: Geography, Length: 120, dtype: object

In [41]:
##Create new Geography name column with formatting titled NCESDist
withoutrace['NCESDist']= withoutrace['Geography'].apply(splitGeo)

In [42]:
#Add Year column
withoutrace['year']= "2018"

In [43]:
withoutrace['School District Type']= "Unified"

In [44]:
withoutrace.head()

Unnamed: 0,Geography,Estimate!!Total!!Population 16 years and over,Estimate!!Employment/Population Ratio!!Population 16 years and over,Estimate!!Unemployment rate!!Population 16 years and over,Estimate!!Total!!Population 20 to 64 years,Estimate!!Employment/Population Ratio!!Population 20 to 64 years,Estimate!!Unemployment rate!!Population 20 to 64 years,NAME,STATE,SCHOOL DISTRICT,NCESDist,year,School District Type
1,9700000US0604740,109560,58.1,3.9,79537,69.6,3.6,"Berkeley Unified School District, California",6,4740,604740,2018,Unified
2,9700000US0634710,63248,62.3,3.8,49083,75.9,3.3,"San Lorenzo Unified School District, California",6,34710,634710,2018,Unified
3,9700000US0604290,50539,56.1,3.4,34450,74.2,3.6,"Beaumont Unified School District, California",6,4290,604290,2018,Unified
4,9700000US0610620,71904,50.2,5.8,52289,61.1,4.7,"Davis Joint Unified School District, California",6,10620,610620,2018,Unified
5,9700000US0611110,166990,46.8,10.7,104703,67.2,10.4,"Desert Sands Unified School District, California",6,11110,611110,2018,Unified


In [45]:
unified2018= withoutrace.drop(columns=['Geography', 'NAME', 'STATE','SCHOOL DISTRICT'])

In [46]:
unified2018.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Unified2018.csv")

In [47]:
#Rearrange columns in list
#['NCESDist','District'] + list(without_annotation_df.columns[2:])

In [48]:
#Rearrange columns in dataframe
#final = without_annotation_df[['NCESDist','District','Geography','NAME'] + list(without_annotation_df.columns[2:])]
#final