# Census- Employment Status Data

In [1]:
import pandas as pd
import requests

In [2]:
#Census Subject Table API for Employment Status data within Secondary School Districts in California for 2018
url="https://api.census.gov/data/2016/acs/acs1/subject?get=group(S2301)&for=school%20district%20(secondary)&in=state:06"

In [3]:
#Request for HTTP Data from Census API, which is working <Response [200]>
response = requests.get(url)

In [4]:
#Resetting data from API Data for future formatting, lists data in one column
response_json = response.json()

In [5]:
#Places data in a dataframe and drops index column 0 (with headers), which has 45 schools and 702 columns of variables
secondaryschool_df = pd.DataFrame(response_json,columns=response_json[0]).drop(0)

In [6]:
secondaryschool_df

Unnamed: 0,S2301_C01_001E,S2301_C01_001M,S2301_C01_002E,S2301_C01_002M,S2301_C01_003E,S2301_C01_003M,S2301_C01_004E,S2301_C01_004M,S2301_C01_005E,S2301_C01_005M,...,S2301_C04_033EA,S2301_C04_033MA,S2301_C04_034M,S2301_C04_034EA,S2301_C04_034MA,S2301_C04_035M,S2301_C04_035EA,S2301_C04_035MA,state,school district (secondary)
1,100100,4475,7308,1195,5693,1535,6369,1491,6206,1421,...,,,1.9,,,1.2,,,6,1650
2,319085,8784,24209,2535,32503,3239,34130,3548,28933,2897,...,,,1.6,,,1.1,,,6,2630
3,287277,7045,23883,2510,26985,2612,28820,3336,24469,2546,...,,,1.8,,,1.6,,,6,2820
4,87007,4165,15015,1563,18335,2288,5363,1202,5360,1229,...,,,4.8,,,2.9,,,6,6019
5,69912,4425,5313,1492,3859,1097,7947,1981,5920,1448,...,,,3.4,,,3.2,,,6,6034
6,180987,7729,8238,1437,10272,1842,16354,2256,19025,2510,...,,,1.3,,,1.0,,,6,7230
7,119432,5724,8049,1209,13021,2220,16273,1766,13419,1978,...,,,3.3,,,2.8,,,6,7920
8,325335,6461,23168,2322,32470,3784,34646,3228,31178,2485,...,,,1.4,,,0.8,,,6,8160
9,51366,3832,5063,1358,7216,1685,7481,1301,4604,1081,...,,,5.8,,,10.8,,,6,10860
10,440448,11175,27784,2251,40078,3258,41610,3817,42900,3048,...,,,1.6,,,0.8,,,6,11820


In [7]:
#secondaryschool_df.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Secondary2016test.csv")

In [8]:
#View School District to view column titles, 2016 data does not include district name
#secondaryschool_df.loc[secondaryschool_df["NAME"]=="Los Angeles Unified School District, California"]

## _Format Data_

In [9]:
#Census Subject Table Variables for Employment Status data
variableurl = "https://api.census.gov/data/2016/acs/acs1/subject/variables.json"

In [10]:
#Request for HTTP Data from Census API and reset data
variables_json = requests.get(variableurl).json()

In [11]:
#View variable for column title
variables_json["variables"]["S2301_C01_001E"]

{'label': 'Total!!Estimate!!Population 16 years and over',
 'predicateType': 'int',
 'group': 'S2301',
 'limit': 0,
 'attributes': 'S2301_C01_001M,S2301_C01_001EA'}

In [12]:
#Find and replace all columns with variable titles
new_labels = []
for col in secondaryschool_df.columns:
    label = variables_json["variables"].get(col)
    if label is not None:
        label = label['label']
    else:
        if col[-2:] == 'EA':
            label = variables_json["variables"].get(col[:-1])
            label = label['label'] + "||Annotation"
        elif col[-1] == 'M':
            label = variables_json["variables"].get(col[:-1]+'E')
            label = label['label'] + "||MarginOfError"
        elif col[-2:] == 'MA':
            label = variables_json["variables"].get(col[:-2]+'E')
            label = label['label'] + "||MarginOfErrorAnnotation"
    new_labels.append(label)

In [13]:
#Find any columns without titles
new_labels  

['Total!!Estimate!!Population 16 years and over',
 'Total!!Estimate!!Population 16 years and over||MarginOfError',
 'Total!!Estimate!!AGE!!16 to 19 years',
 'Total!!Estimate!!AGE!!16 to 19 years||MarginOfError',
 'Total!!Estimate!!AGE!!20 to 24 years',
 'Total!!Estimate!!AGE!!20 to 24 years||MarginOfError',
 'Total!!Estimate!!AGE!!25 to 29 years',
 'Total!!Estimate!!AGE!!25 to 29 years||MarginOfError',
 'Total!!Estimate!!AGE!!30 to 34 years',
 'Total!!Estimate!!AGE!!30 to 34 years||MarginOfError',
 'Total!!Estimate!!AGE!!35 to 44 years',
 'Total!!Estimate!!AGE!!35 to 44 years||MarginOfError',
 'Total!!Estimate!!AGE!!45 to 54 years',
 'Total!!Estimate!!AGE!!45 to 54 years||MarginOfError',
 'Total!!Estimate!!AGE!!55 to 59 years',
 'Total!!Estimate!!AGE!!55 to 59 years||MarginOfError',
 'Total!!Estimate!!AGE!!60 to 64 years',
 'Total!!Estimate!!AGE!!60 to 64 years||MarginOfError',
 'Total!!Estimate!!AGE!!65 to 74 years',
 'Total!!Estimate!!AGE!!65 to 74 years||MarginOfError',
 'Total!!Est

In [14]:
#Change column titles for columns labeled "None" 
assert len(new_labels) == len(secondaryschool_df.columns)

In [15]:
#Confirm the number of columns without titles
sum([1 for x in new_labels if x is None])

2

In [16]:
#Setup new Labels of columns labeled "None"
#new_labels[-283] = 'NAME'
new_labels[-2] = 'STATE'
new_labels[-1] = 'SCHOOL DISTRICT (SECONDARY)'

In [17]:
#Create new labels of columns labeled "None"
secondaryschool_df.columns = new_labels

In [18]:
#Find all columns without "Annotation" in column title
[col for col in secondaryschool_df.columns if "Annotation" not in col]

['Total!!Estimate!!Population 16 years and over',
 'Total!!Estimate!!Population 16 years and over||MarginOfError',
 'Total!!Estimate!!AGE!!16 to 19 years',
 'Total!!Estimate!!AGE!!16 to 19 years||MarginOfError',
 'Total!!Estimate!!AGE!!20 to 24 years',
 'Total!!Estimate!!AGE!!20 to 24 years||MarginOfError',
 'Total!!Estimate!!AGE!!25 to 29 years',
 'Total!!Estimate!!AGE!!25 to 29 years||MarginOfError',
 'Total!!Estimate!!AGE!!30 to 34 years',
 'Total!!Estimate!!AGE!!30 to 34 years||MarginOfError',
 'Total!!Estimate!!AGE!!35 to 44 years',
 'Total!!Estimate!!AGE!!35 to 44 years||MarginOfError',
 'Total!!Estimate!!AGE!!45 to 54 years',
 'Total!!Estimate!!AGE!!45 to 54 years||MarginOfError',
 'Total!!Estimate!!AGE!!55 to 59 years',
 'Total!!Estimate!!AGE!!55 to 59 years||MarginOfError',
 'Total!!Estimate!!AGE!!60 to 64 years',
 'Total!!Estimate!!AGE!!60 to 64 years||MarginOfError',
 'Total!!Estimate!!AGE!!65 to 74 years',
 'Total!!Estimate!!AGE!!65 to 74 years||MarginOfError',
 'Total!!Est

In [19]:
#Create a new dataframe for data without the columns that have "Annotation" in the title, which is 702 columns
without_annotation = secondaryschool_df[[col for col in secondaryschool_df.columns if "Annotation" not in col]].copy()

In [20]:
without_annotation.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!Population 16 years and over||MarginOfError,Total!!Estimate!!Population 16 years and over||MarginOfError.1,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!16 to 19 years||MarginOfError,Total!!Estimate!!AGE!!16 to 19 years||MarginOfError.1,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!20 to 24 years||MarginOfError,Total!!Estimate!!AGE!!20 to 24 years||MarginOfError.1,Total!!Estimate!!AGE!!25 to 29 years,...,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate||MarginOfError,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate||MarginOfError.1,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency)||MarginOfError,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency)||MarginOfError.1,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree||MarginOfError,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree||MarginOfError.1,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher||MarginOfError,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher||MarginOfError.1,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,4475,4475,7308,1195,1195,5693,1535,1535,6369,...,24.8,24.8,5.2,5.2,1.9,1.9,1.2,1.2,6,1650
2,319085,8784,8784,24209,2535,2535,32503,3239,3239,34130,...,1.8,1.8,1.8,1.8,1.6,1.6,1.1,1.1,6,2630
3,287277,7045,7045,23883,2510,2510,26985,2612,2612,28820,...,3.8,3.8,2.6,2.6,1.8,1.8,1.6,1.6,6,2820
4,87007,4165,4165,15015,1563,1563,18335,2288,2288,5363,...,2.8,2.8,6.3,6.3,4.8,4.8,2.9,2.9,6,6019
5,69912,4425,4425,5313,1492,1492,3859,1097,1097,7947,...,6.9,6.9,2.9,2.9,3.4,3.4,3.2,3.2,6,6034


In [21]:
#Find all columns without "MarginOfError" in column title
#[col for col in without_annotation_df.columns if "MarginOfError" not in col]

In [22]:
#Create a new dataframe for data without the columns that have "MarginOfError" in the title, which is 142 columns
withoutmarginerror = without_annotation[[col for col in without_annotation.columns if 'MarginOfError' not in col]].copy()

In [23]:
withoutmarginerror.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!25 to 29 years,Total!!Estimate!!AGE!!30 to 34 years,Total!!Estimate!!AGE!!35 to 44 years,Total!!Estimate!!AGE!!45 to 54 years,Total!!Estimate!!AGE!!55 to 59 years,Total!!Estimate!!AGE!!60 to 64 years,Total!!Estimate!!AGE!!65 to 74 years,...,Unemployment rate!!Estimate!!POVERTY STATUS IN THE PAST 12 MONTHS!!Below poverty level,Unemployment rate!!Estimate!!POVERTY STATUS IN THE PAST 12 MONTHS!!At or above the poverty level,Unemployment rate!!Estimate!!DISABILITY STATUS!!With any disability,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,7308,5693,6369,6206,12654,18953,7336,8319,14188,...,-999999999.0,-999999999.0,-999999999.0,2.1,15.3,3.1,1.6,2.0,6,1650
2,319085,24209,32503,34130,28933,51004,56739,25601,19631,26668,...,15.1,4.1,8.8,4.7,6.1,5.1,5.0,2.8,6,2630
3,287277,23883,26985,28820,24469,44246,53125,25136,21468,23753,...,26.0,5.5,13.3,7.4,12.9,8.0,5.9,4.4,6,2820
4,87007,15015,18335,5363,5360,7573,9788,5966,4730,7531,...,16.3,4.0,14.2,4.6,1.7,5.6,5.6,4.3,6,6019
5,69912,5313,3859,7947,5920,9700,9543,5673,4882,8358,...,16.2,4.7,15.0,4.9,6.1,5.0,4.9,4.7,6,6034


In [24]:
#Find all columns without "Labor Force Participation Rate" in column title
#[col for col in withoutmarginerror.columns if "Labor Force Participation Rate" not in col]

In [25]:
#Create a new dataframe for data without the columns that have "Labor Force Participation Rate" in the title, which is 107 columns
withoutlaborforce = withoutmarginerror[[col for col in withoutmarginerror.columns if 'Labor Force Participation Rate' not in col]].copy()

In [26]:
withoutlaborforce.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!25 to 29 years,Total!!Estimate!!AGE!!30 to 34 years,Total!!Estimate!!AGE!!35 to 44 years,Total!!Estimate!!AGE!!45 to 54 years,Total!!Estimate!!AGE!!55 to 59 years,Total!!Estimate!!AGE!!60 to 64 years,Total!!Estimate!!AGE!!65 to 74 years,...,Unemployment rate!!Estimate!!POVERTY STATUS IN THE PAST 12 MONTHS!!Below poverty level,Unemployment rate!!Estimate!!POVERTY STATUS IN THE PAST 12 MONTHS!!At or above the poverty level,Unemployment rate!!Estimate!!DISABILITY STATUS!!With any disability,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,7308,5693,6369,6206,12654,18953,7336,8319,14188,...,-999999999.0,-999999999.0,-999999999.0,2.1,15.3,3.1,1.6,2.0,6,1650
2,319085,24209,32503,34130,28933,51004,56739,25601,19631,26668,...,15.1,4.1,8.8,4.7,6.1,5.1,5.0,2.8,6,2630
3,287277,23883,26985,28820,24469,44246,53125,25136,21468,23753,...,26.0,5.5,13.3,7.4,12.9,8.0,5.9,4.4,6,2820
4,87007,15015,18335,5363,5360,7573,9788,5966,4730,7531,...,16.3,4.0,14.2,4.6,1.7,5.6,5.6,4.3,6,6019
5,69912,5313,3859,7947,5920,9700,9543,5673,4882,8358,...,16.2,4.7,15.0,4.9,6.1,5.0,4.9,4.7,6,6034


In [27]:
#Find all columns without "Sex" in column title
#[col for col in withoutlaborforce.columns if "SEX" not in col]

In [28]:
#Create a new dataframe for data without the columns that have "Sex" in the title, which is 89 columns
withoutsex = withoutlaborforce[[col for col in withoutlaborforce.columns if 'SEX' not in col]].copy()

In [29]:
withoutsex.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!25 to 29 years,Total!!Estimate!!AGE!!30 to 34 years,Total!!Estimate!!AGE!!35 to 44 years,Total!!Estimate!!AGE!!45 to 54 years,Total!!Estimate!!AGE!!55 to 59 years,Total!!Estimate!!AGE!!60 to 64 years,Total!!Estimate!!AGE!!65 to 74 years,...,Unemployment rate!!Estimate!!POVERTY STATUS IN THE PAST 12 MONTHS!!Below poverty level,Unemployment rate!!Estimate!!POVERTY STATUS IN THE PAST 12 MONTHS!!At or above the poverty level,Unemployment rate!!Estimate!!DISABILITY STATUS!!With any disability,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,7308,5693,6369,6206,12654,18953,7336,8319,14188,...,-999999999.0,-999999999.0,-999999999.0,2.1,15.3,3.1,1.6,2.0,6,1650
2,319085,24209,32503,34130,28933,51004,56739,25601,19631,26668,...,15.1,4.1,8.8,4.7,6.1,5.1,5.0,2.8,6,2630
3,287277,23883,26985,28820,24469,44246,53125,25136,21468,23753,...,26.0,5.5,13.3,7.4,12.9,8.0,5.9,4.4,6,2820
4,87007,15015,18335,5363,5360,7573,9788,5966,4730,7531,...,16.3,4.0,14.2,4.6,1.7,5.6,5.6,4.3,6,6019
5,69912,5313,3859,7947,5920,9700,9543,5673,4882,8358,...,16.2,4.7,15.0,4.9,6.1,5.0,4.9,4.7,6,6034


In [30]:
#Find all columns without "Poverty Status" in column title
#[col for col in withoutsex.columns if "POVERTY STATUS" not in col]

In [31]:
#Create a new dataframe for data without the columns that have "Poverty Status" in the title, which is 83 columns
withoutps = withoutsex[[col for col in withoutsex.columns if 'POVERTY STATUS' not in col]].copy()

In [32]:
withoutps.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!25 to 29 years,Total!!Estimate!!AGE!!30 to 34 years,Total!!Estimate!!AGE!!35 to 44 years,Total!!Estimate!!AGE!!45 to 54 years,Total!!Estimate!!AGE!!55 to 59 years,Total!!Estimate!!AGE!!60 to 64 years,Total!!Estimate!!AGE!!65 to 74 years,...,"Unemployment rate!!Estimate!!White alone, not Hispanic or Latino",Unemployment rate!!Estimate!!Population 20 to 64 years,Unemployment rate!!Estimate!!DISABILITY STATUS!!With any disability,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,7308,5693,6369,6206,12654,18953,7336,8319,14188,...,3.2,2.0,-999999999.0,2.1,15.3,3.1,1.6,2.0,6,1650
2,319085,24209,32503,34130,28933,51004,56739,25601,19631,26668,...,5.4,5.0,8.8,4.7,6.1,5.1,5.0,2.8,6,2630
3,287277,23883,26985,28820,24469,44246,53125,25136,21468,23753,...,7.0,7.4,13.3,7.4,12.9,8.0,5.9,4.4,6,2820
4,87007,15015,18335,5363,5360,7573,9788,5966,4730,7531,...,9.5,5.9,14.2,4.6,1.7,5.6,5.6,4.3,6,6019
5,69912,5313,3859,7947,5920,9700,9543,5673,4882,8358,...,7.7,5.1,15.0,4.9,6.1,5.0,4.9,4.7,6,6034


In [33]:
#Find all columns without "Disability Status" in column title
#[col for col in withoutps.columns if "DISABILITY STATUS" not in col]

In [34]:
#Create a new dataframe for data without the columns that have "Disability Status" in the title, which is 80 columns
withoutds = withoutps[[col for col in withoutps.columns if 'DISABILITY STATUS' not in col]].copy()

In [35]:
withoutds.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!25 to 29 years,Total!!Estimate!!AGE!!30 to 34 years,Total!!Estimate!!AGE!!35 to 44 years,Total!!Estimate!!AGE!!45 to 54 years,Total!!Estimate!!AGE!!55 to 59 years,Total!!Estimate!!AGE!!60 to 64 years,Total!!Estimate!!AGE!!65 to 74 years,...,Unemployment rate!!Estimate!!Hispanic or Latino origin (of any race),"Unemployment rate!!Estimate!!White alone, not Hispanic or Latino",Unemployment rate!!Estimate!!Population 20 to 64 years,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Less than high school graduate,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!High school graduate (includes equivalency),Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Some college or associate's degree,Unemployment rate!!Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 to 64 years!!Bachelor's degree or higher,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,7308,5693,6369,6206,12654,18953,7336,8319,14188,...,-999999999.0,3.2,2.0,2.1,15.3,3.1,1.6,2.0,6,1650
2,319085,24209,32503,34130,28933,51004,56739,25601,19631,26668,...,5.8,5.4,5.0,4.7,6.1,5.1,5.0,2.8,6,2630
3,287277,23883,26985,28820,24469,44246,53125,25136,21468,23753,...,6.3,7.0,7.4,7.4,12.9,8.0,5.9,4.4,6,2820
4,87007,15015,18335,5363,5360,7573,9788,5966,4730,7531,...,6.8,9.5,5.9,4.6,1.7,5.6,5.6,4.3,6,6019
5,69912,5313,3859,7947,5920,9700,9543,5673,4882,8358,...,-999999999.0,7.7,5.1,4.9,6.1,5.0,4.9,4.7,6,6034


In [36]:
#Find all columns without "Educational Attainment" in column title
#[col for col in withoutds.columns if "EDUCATIONAL ATTAINMENT" not in col]

In [37]:
#Create a new dataframe for data without the columns that have "Educational Attainment" in the title, which is 65 columns
withoutea = withoutds[[col for col in withoutds.columns if 'EDUCATIONAL ATTAINMENT' not in col]].copy()

In [38]:
withoutea.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!AGE!!16 to 19 years,Total!!Estimate!!AGE!!20 to 24 years,Total!!Estimate!!AGE!!25 to 29 years,Total!!Estimate!!AGE!!30 to 34 years,Total!!Estimate!!AGE!!35 to 44 years,Total!!Estimate!!AGE!!45 to 54 years,Total!!Estimate!!AGE!!55 to 59 years,Total!!Estimate!!AGE!!60 to 64 years,Total!!Estimate!!AGE!!65 to 74 years,...,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Unemployment rate!!Estimate!!Hispanic or Latino origin (of any race),"Unemployment rate!!Estimate!!White alone, not Hispanic or Latino",Unemployment rate!!Estimate!!Population 20 to 64 years,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,7308,5693,6369,6206,12654,18953,7336,8319,14188,...,-999999999.0,0.9,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.2,2.0,6,1650
2,319085,24209,32503,34130,28933,51004,56739,25601,19631,26668,...,-999999999.0,4.3,-999999999.0,7.2,7.6,5.8,5.4,5.0,6,2630
3,287277,23883,26985,28820,24469,44246,53125,25136,21468,23753,...,-999999999.0,10.5,-999999999.0,7.2,11.3,6.3,7.0,7.4,6,2820
4,87007,15015,18335,5363,5360,7573,9788,5966,4730,7531,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,6.8,9.5,5.9,6,6019
5,69912,5313,3859,7947,5920,9700,9543,5673,4882,8358,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,7.7,5.1,6,6034


In [39]:
#Find all columns without "Age" in column title
#[col for col in withoutea.columns if "AGE" not in col]

In [40]:
#Create a new dataframe for data without the columns that have "Age" in the title, which is 35 columns
withoutage = withoutea[[col for col in withoutea.columns if 'AGE' not in col]].copy()

In [41]:
withoutage

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Total!!Estimate!!Hispanic or Latino origin (of any race),"Total!!Estimate!!White alone, not Hispanic or Latino",...,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Unemployment rate!!Estimate!!Hispanic or Latino origin (of any race),"Unemployment rate!!Estimate!!White alone, not Hispanic or Latino",Unemployment rate!!Estimate!!Population 20 to 64 years,STATE,SCHOOL DISTRICT (SECONDARY)
1,100100,79869,-999999999,-999999999,13001,-999999999,-999999999,-999999999,-999999999,74800,...,-999999999.0,0.9,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.2,2.0,6,1650
2,319085,208406,-999999999,-999999999,68969,-999999999,19330,9666,151506,82185,...,-999999999.0,4.3,-999999999.0,7.2,7.6,5.8,5.4,5.0,6,2630
3,287277,167121,39528,-999999999,13655,-999999999,51603,12960,130322,97593,...,-999999999.0,10.5,-999999999.0,7.2,11.3,6.3,7.0,7.4,6,2820
4,87007,64169,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,19541,54665,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,6.8,9.5,5.9,6,6019
5,69912,44085,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,36089,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,7.7,5.1,6,6034
6,180987,110458,-999999999,-999999999,44126,-999999999,-999999999,-999999999,35931,89432,...,-999999999.0,3.3,-999999999.0,-999999999.0,-999999999.0,2.7,3.2,3.1,6,7230
7,119432,38338,16573,-999999999,-999999999,-999999999,43788,-999999999,74446,11519,...,-999999999.0,-999999999.0,-999999999.0,4.8,-999999999.0,5.4,7.4,5.7,6,7920
8,325335,182043,27357,-999999999,34503,-999999999,67271,-999999999,167540,89829,...,-999999999.0,2.5,-999999999.0,11.0,-999999999.0,10.4,3.9,7.3,6,8160
9,51366,36661,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,41226,-999999999,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,16.8,-999999999.0,16.0,6,10860
10,440448,133855,11327,-999999999,190395,-999999999,82523,17319,151523,77208,...,-999999999.0,5.4,-999999999.0,7.3,9.6,7.2,6.1,6.0,6,11820


In [42]:
#Format to rename School District Name
#def cleandistrict(NAME):
    #return NAME.replace(", California","")

In [43]:
#Apply formatting condition to School District Name
#withoutage['NAME'].apply(cleandistrict)

In [44]:
#Create new School District name column with formatting titled District
#withoutage['District']= withoutage['NAME'].apply(cleandistrict)

In [45]:
#Find length of Geographt column contents, which is 16
#withoutage['Geography'].apply(len).unique()

#Pull a geography
#geo = "9700000US0622710"

#Find NCESDist ID
#geo.split("US")[1]

In [46]:
#Format to seperate Census Geography code (the state and district ID combined), 
#def splitGeo(geo):
    #return geo.split("US")[1]

In [47]:
#Apply formatting condition to Geography
#withoutage['Geography'].apply(splitGeo)

In [48]:
#Create new Geography name column with formatting titled NCESDist
#withoutage['NCESDist']= withoutage['Geography'].apply(splitGeo)

In [49]:
#Create new column with combined state and district ID
withoutage['NCESDist'] = withoutage['STATE'].astype(str) + withoutage['SCHOOL DISTRICT (SECONDARY)']

In [50]:
#Add Year column
withoutage['year']= "2016"

In [51]:
withoutage.head()

Unnamed: 0,Total!!Estimate!!Population 16 years and over,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!White alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Black or African American alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!American Indian and Alaska Native alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Asian alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Total!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Total!!Estimate!!Hispanic or Latino origin (of any race),"Total!!Estimate!!White alone, not Hispanic or Latino",...,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Native Hawaiian and Other Pacific Islander alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Some other race alone,Unemployment rate!!Estimate!!RACE AND HISPANIC OR LATINO ORIGIN!!Two or more races,Unemployment rate!!Estimate!!Hispanic or Latino origin (of any race),"Unemployment rate!!Estimate!!White alone, not Hispanic or Latino",Unemployment rate!!Estimate!!Population 20 to 64 years,STATE,SCHOOL DISTRICT (SECONDARY),NCESDist,year
1,100100,79869,-999999999,-999999999,13001,-999999999,-999999999,-999999999,-999999999,74800,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3.2,2.0,6,1650,601650,2016
2,319085,208406,-999999999,-999999999,68969,-999999999,19330,9666,151506,82185,...,-999999999.0,7.2,7.6,5.8,5.4,5.0,6,2630,602630,2016
3,287277,167121,39528,-999999999,13655,-999999999,51603,12960,130322,97593,...,-999999999.0,7.2,11.3,6.3,7.0,7.4,6,2820,602820,2016
4,87007,64169,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,19541,54665,...,-999999999.0,-999999999.0,-999999999.0,6.8,9.5,5.9,6,6019,606019,2016
5,69912,44085,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,-999999999,36089,...,-999999999.0,-999999999.0,-999999999.0,-999999999.0,7.7,5.1,6,6034,606034,2016


In [52]:
withoutage.to_csv("/Users/nataligracia/git/ca-school-enrollment-trend/Secondary2016.csv")

In [53]:
#Rearrange columns in list
#['NCESDist','District'] + list(without_annotation_df.columns[2:])

In [54]:
#Rearrange columns in dataframe
#final = without_annotation_df[['NCESDist','District','Geography','NAME'] + list(without_annotation_df.columns[2:])]
#final