# Census Data Import and Cleaning

In [1]:
# Install Census data API
!pip install censusdata

Collecting censusdata
  Downloading CensusData-1.15.post1.tar.gz (26.6 MB)
[K     |████████████████████████████████| 26.6 MB 16.8 MB/s eta 0:00:01
Building wheels for collected packages: censusdata
  Building wheel for censusdata (setup.py) ... [?25ldone
[?25h  Created wheel for censusdata: filename=CensusData-1.15.post1-py3-none-any.whl size=28205763 sha256=81548fd912c58407801eae9077d6ff6a6012ecc3b7171f52e48ad535407c7702
  Stored in directory: /Users/davidfix/Library/Caches/pip/wheels/2f/72/16/5c8e185711751db7fa81da8c6329c8b3d82c0582a51a0f1e85
Successfully built censusdata
Installing collected packages: censusdata
Successfully installed censusdata-1.15.post1


In [2]:
# Import needed packages
import pandas as pd
import censusdata
import numpy as np

In [None]:
# Get all states Geo codes
# In order to access this data you may have to request a key to be sent to your email address from Key Signup: https://api.census.gov/data/key_signup.html
states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2018,key='b15ade4ca040c6435803ef83605eccaa17abb8e0')

#select Michigan
statecode = states['Michigan']
statecode
# states

censusgeo((('state', '26'),))

In [None]:
# counties = censusdata.geographies(censusdata.censusgeo([('state', '*'),("county","*")]), 'acs5', 2018,key='b15ade4ca040c6435803ef83605eccaa17abb8e0')
# counties

In [None]:
# Get all school district names and codes in Michigan
schooldistricts = censusdata.geographies(censusdata.censusgeo([('state', '26'), ('school district (unified)', '*')]), 'acs5', 2018,key='b15ade4ca040c6435803ef83605eccaa17abb8e0')

#print sample
list(schooldistricts)[:2]
schooldistricts

{"L'Anse Area Schools, Michigan": censusgeo((('state', '26'), ('school district (unified)', '00018'))),
 'Superior Central Schools, Michigan': censusgeo((('state', '26'), ('school district (unified)', '00019'))),
 'Adams Township School District, Michigan': censusgeo((('state', '26'), ('school district (unified)', '01890'))),
 'Addison Community Schools, Michigan': censusgeo((('state', '26'), ('school district (unified)', '01920'))),
 'Adrian City School District, Michigan': censusgeo((('state', '26'), ('school district (unified)', '01950'))),
 'Airport Community School District, Michigan': censusgeo((('state', '26'), ('school district (unified)', '01980'))),
 'Akron-Fairgrove Schools, Michigan': censusgeo((('state', '26'), ('school district (unified)', '02010'))),
 'Alba Public Schools, Michigan': censusgeo((('state', '26'), ('school district (unified)', '02040'))),
 'Alcona Community Schools, Michigan': censusgeo((('state', '26'), ('school district (unified)', '02160'))),
 'Algonac C

In [None]:
# Get list of school districts geography codes
sd_nums = []
for schooldistrict in schooldistricts:
  sd_nums.append(schooldistricts[schooldistrict].params()[1][1])

In [None]:
# Print 5 sample district codes and length of codes
print(sd_nums[:5]) #print sample
len(sd_nums) #516 school districts

['00018', '00019', '01890', '01920', '01950']


516

In [None]:
df = pd.DataFrame(dict(censusdata.censustable('acs5', 2018, 'B14001'))).T.reset_index()
df.columns

df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
example_of_returned_data = censusdata.download('acs5', 2018, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', "00018")]),
                                  # ('block group', '*')]),
            index_list, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')
example_of_returned_data

Unnamed: 0,B14001_001E,B14001_002E,B14001_003E,B14001_004E,B14001_005E,B14001_006E,B14001_007E,B14001_008E,B14001_009E,B14001_010E
"L'Anse Area Schools, Michigan: Summary level: 970, state:26> school district (unified):00018",4522,1044,60,55,282,195,307,133,12,3478


# Identify table variables

In [None]:
#download available data elements and Table IDs
#looked through this in Excel

url = 'https://www2.census.gov/programs-surveys/acs/summary_file/2018/documentation/user_tools/ACS2018_Table_Shells.xlsx'
pd.set_option('display.max_rows', 50)
tableids = pd.read_excel(url, header=0)
tableids

Unnamed: 0,Table ID,Line,UniqueID,Stub,Data Release
0,,,,,
1,B00001,,,UNWEIGHTED SAMPLE COUNT OF THE POPULATION,15
2,B00001,,,Universe: Total population,
3,B00001,1,B00001_001,Total,
4,,,,,
...,...,...,...,...,...
40027,B99283,1.5,,Allocated:,
40028,B99283,2,B99283_002,Dial-up,
40029,B99283,3,B99283_003,"Broadband such as cable, fiberoptic, or DSL, s...",
40030,B99283,4,B99283_004,Cellular data plan,


In [None]:
# search for desired information

searched_tables = tableids[tableids['Stub'].str.contains("EDUCATIONAL ATTAINMENT", na=False)]
searched_tables
# list(searched_tables.Stub)

### First three in list
# SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER
# SEX BY SCHOOL ENROLLMENT BY LEVEL OF SCHOOL BY TYPE OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER
# SCHOOL ENROLLMENT BY LEVEL OF SCHOOL BY TYPE OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER'
# ALLOCATION OF SCHOOL ENROLLMENT FOR THE POPULATION 3 YEARS AND OVER #### Last one 
### Considering both private and public schools?
# SCHOOL ENROLLMENT BY TYPE OF SCHOOL BY AGE FOR THE POPULATION 3 YEARS AND OVER


Unnamed: 0,Table ID,Line,UniqueID,Stub,Data Release
3190,B06009,,,PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN TH...,15
3223,B06009PR,,,PLACE OF BIRTH BY EDUCATIONAL ATTAINMENT IN PU...,15
4244,B07009,,,GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUC...,15
4283,B07009PR,,,GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUC...,15
5437,B07409,,,GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUC...,15
5470,B07409PR,,,GEOGRAPHICAL MOBILITY IN THE PAST YEAR BY EDUC...,15
11120,B13014,,,WOMEN 15 TO 50 YEARS WHO HAD A BIRTH IN THE PA...,15
11407,B14005,,,SEX BY SCHOOL ENROLLMENT BY EDUCATIONAL ATTAIN...,15
11439,C14005,,,SEX BY SCHOOL ENROLLMENT BY EDUCATIONAL ATTAIN...,1
11795,B15001,,,SEX BY AGE BY EDUCATIONAL ATTAINMENT FOR THE P...,15


In [None]:
censusdata.censustable('acs5', 2018, 'B14001')


OrderedDict([('B14001_001E',
              {'concept': 'SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER',
               'label': 'Estimate!!Total',
               'predicateType': 'int'}),
             ('B14001_002E',
              {'concept': 'SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER',
               'label': 'Estimate!!Total!!Enrolled in school',
               'predicateType': 'int'}),
             ('B14001_003E',
              {'concept': 'SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER',
               'label': 'Estimate!!Total!!Enrolled in school!!Enrolled in nursery school, preschool',
               'predicateType': 'int'}),
             ('B14001_004E',
              {'concept': 'SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE POPULATION 3 YEARS AND OVER',
               'label': 'Estimate!!Total!!Enrolled in school!!Enrolled in kindergarten',
               'predicateType': 'int'}),
             ('

In [None]:
pd.DataFrame(dict(censusdata.censustable('acs5', 2018, 'B14001'))).T.reset_index()

Unnamed: 0,index,label,concept,predicateType
0,B14001_001E,Estimate!!Total,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
1,B14001_002E,Estimate!!Total!!Enrolled in school,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
2,B14001_003E,Estimate!!Total!!Enrolled in school!!Enrolled ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
3,B14001_004E,Estimate!!Total!!Enrolled in school!!Enrolled ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
4,B14001_005E,Estimate!!Total!!Enrolled in school!!Enrolled ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
5,B14001_006E,Estimate!!Total!!Enrolled in school!!Enrolled ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
6,B14001_007E,Estimate!!Total!!Enrolled in school!!Enrolled ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
7,B14001_008E,Estimate!!Total!!Enrolled in school!!Enrolled ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
8,B14001_009E,Estimate!!Total!!Enrolled in school!!Graduate ...,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int
9,B14001_010E,Estimate!!Total!!Not enrolled in school,SCHOOL ENROLLMENT BY LEVEL OF SCHOOL FOR THE P...,int


In [None]:
#use table name to look up all columns in a table
# use 'Variable name' in query below
# variable_labels = censusdata.printtable(censusdata.censustable('acs5', 2018, 'B14001'))
# pd.DataFrame(variable_labels)
 
## ACS 5 year estimates, year 2018, 3rd parameter is the table ID from the searched tables mentioned above
df = pd.DataFrame(dict(censusdata.censustable('acs5', 2018, 'B14001'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))


df2 = pd.DataFrame(dict(censusdata.censustable('acs5', 2018, 'B25013'))).T.reset_index()
df2['label'] = df2['label'].str.replace("!!",' ')
index_list2 = list(df2["index"])
label_list2 = list(df2["label"])
renamed_columns_dict2= dict(zip(index_list2,label_list2))


df3 = pd.DataFrame(dict(censusdata.censustable('acs5', 2018, 'B15003'))).T.reset_index()
df3['label'] = df3['label'].str.replace("!!",' ')
index_list3 = list(df3["index"])
label_list3 = list(df3["label"])
renamed_columns_dict3= dict(zip(index_list3,label_list3))

renamed_columns_dict3["B15003_001E"] = "Estimated Population over 25"
renamed_columns_dict3

{'B15003_001E': 'Estimated Population over 25',
 'B15003_002E': 'Estimate Total No schooling completed',
 'B15003_003E': 'Estimate Total Nursery school',
 'B15003_004E': 'Estimate Total Kindergarten',
 'B15003_005E': 'Estimate Total 1st grade',
 'B15003_006E': 'Estimate Total 2nd grade',
 'B15003_007E': 'Estimate Total 3rd grade',
 'B15003_008E': 'Estimate Total 4th grade',
 'B15003_009E': 'Estimate Total 5th grade',
 'B15003_010E': 'Estimate Total 6th grade',
 'B15003_011E': 'Estimate Total 7th grade',
 'B15003_012E': 'Estimate Total 8th grade',
 'B15003_013E': 'Estimate Total 9th grade',
 'B15003_014E': 'Estimate Total 10th grade',
 'B15003_015E': 'Estimate Total 11th grade',
 'B15003_016E': 'Estimate Total 12th grade, no diploma',
 'B15003_017E': 'Estimate Total Regular high school diploma',
 'B15003_018E': 'Estimate Total GED or alternative credential',
 'B15003_019E': 'Estimate Total Some college, less than 1 year',
 'B15003_020E': 'Estimate Total Some college, 1 or more years, no

In [None]:
index_list3



['B15003_001E',
 'B15003_002E',
 'B15003_003E',
 'B15003_004E',
 'B15003_005E',
 'B15003_006E',
 'B15003_007E',
 'B15003_008E',
 'B15003_009E',
 'B15003_010E',
 'B15003_011E',
 'B15003_012E',
 'B15003_013E',
 'B15003_014E',
 'B15003_015E',
 'B15003_016E',
 'B15003_017E',
 'B15003_018E',
 'B15003_019E',
 'B15003_020E',
 'B15003_021E',
 'B15003_022E',
 'B15003_023E',
 'B15003_024E',
 'B15003_025E']

In [None]:
## Total Population
df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B01003'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict


{'B01003_001E': 'Estimate Total'}

In [None]:
## Race
df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B02001'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B02001_001E': 'Estimate Total',
 'B02001_002E': 'Estimate Total White alone',
 'B02001_003E': 'Estimate Total Black or African American alone',
 'B02001_004E': 'Estimate Total American Indian and Alaska Native alone',
 'B02001_005E': 'Estimate Total Asian alone',
 'B02001_006E': 'Estimate Total Native Hawaiian and Other Pacific Islander alone',
 'B02001_007E': 'Estimate Total Some other race alone',
 'B02001_008E': 'Estimate Total Two or more races',
 'B02001_009E': 'Estimate Total Two or more races Two races including Some other race',
 'B02001_010E': 'Estimate Total Two or more races Two races excluding Some other race, and three or more races'}

In [None]:
## MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS 
df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B19013'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B19013_001E': 'Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars)'}

In [None]:
## SCHOOL ENROLLMENT BY DETAILED LEVEL OF SCHOOL FOR THE POPULATION 3 AND OVER 
df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B14001'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B14001_001E': 'Estimate Total',
 'B14001_002E': 'Estimate Total Enrolled in school',
 'B14001_003E': 'Estimate Total Enrolled in school Enrolled in nursery school, preschool',
 'B14001_004E': 'Estimate Total Enrolled in school Enrolled in kindergarten',
 'B14001_005E': 'Estimate Total Enrolled in school Enrolled in grade 1 to grade 4',
 'B14001_006E': 'Estimate Total Enrolled in school Enrolled in grade 5 to grade 8',
 'B14001_007E': 'Estimate Total Enrolled in school Enrolled in grade 9 to grade 12',
 'B14001_008E': 'Estimate Total Enrolled in school Enrolled in college, undergraduate years',
 'B14001_009E': 'Estimate Total Enrolled in school Graduate or professional school',
 'B14001_010E': 'Estimate Total Not enrolled in school'}

In [None]:
## EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 AND OVER
df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B15003'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B15003_001E': 'Estimate Total',
 'B15003_002E': 'Estimate Total No schooling completed',
 'B15003_003E': 'Estimate Total Nursery school',
 'B15003_004E': 'Estimate Total Kindergarten',
 'B15003_005E': 'Estimate Total 1st grade',
 'B15003_006E': 'Estimate Total 2nd grade',
 'B15003_007E': 'Estimate Total 3rd grade',
 'B15003_008E': 'Estimate Total 4th grade',
 'B15003_009E': 'Estimate Total 5th grade',
 'B15003_010E': 'Estimate Total 6th grade',
 'B15003_011E': 'Estimate Total 7th grade',
 'B15003_012E': 'Estimate Total 8th grade',
 'B15003_013E': 'Estimate Total 9th grade',
 'B15003_014E': 'Estimate Total 10th grade',
 'B15003_015E': 'Estimate Total 11th grade',
 'B15003_016E': 'Estimate Total 12th grade, no diploma',
 'B15003_017E': 'Estimate Total Regular high school diploma',
 'B15003_018E': 'Estimate Total GED or alternative credential',
 'B15003_019E': 'Estimate Total Some college, less than 1 year',
 'B15003_020E': 'Estimate Total Some college, 1 or more years, no degree',
 'B1

In [None]:
## MEDIAN AGE
df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B01002'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B01002_001E': 'Estimate Median age -- Total',
 'B01002_002E': 'Estimate Median age -- Male',
 'B01002_003E': 'Estimate Median age -- Female'}

In [None]:
## RECEIPT OF SSI, FOOD STAMPS, ETC

df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B09010'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B09010_001E': 'Estimate Total',
 'B09010_002E': 'Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months',
 'B09010_003E': 'Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months In family households',
 'B09010_004E': 'Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months In family households In married-couple family',
 'B09010_005E': 'Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months In family households In male householder, no wife present, family',
 'B09010_006E': 'Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 mo

In [None]:
## HOUSEHOLD TYPE

df = pd.DataFrame(dict(censusdata.censustable('acs1', 2018, 'B11001'))).T.reset_index()
df.columns
df['label'] = df['label'].str.replace("!!",' ')
index_list = list(df["index"])
label_list = list(df["label"])
renamed_columns_dict= dict(zip(index_list,label_list))
renamed_columns_dict

{'B11001_001E': 'Estimate Total',
 'B11001_002E': 'Estimate Total Family households',
 'B11001_003E': 'Estimate Total Family households Married-couple family',
 'B11001_004E': 'Estimate Total Family households Other family',
 'B11001_005E': 'Estimate Total Family households Other family Male householder, no wife present',
 'B11001_006E': 'Estimate Total Family households Other family Female householder, no husband present',
 'B11001_007E': 'Estimate Total Nonfamily households',
 'B11001_008E': 'Estimate Total Nonfamily households Householder living alone',
 'B11001_009E': 'Estimate Total Nonfamily households Householder not living alone'}

In [None]:
## Take 2
attr = {
        #  'B01003_001E': 'Estimate Total', 
         'B02001_002E': 'Estimate Total White alone',
         'B02001_003E': 'Estimate Total Black or African American alone',
         'B02001_004E': 'Estimate Total American Indian and Alaska Native alone',
         'B02001_005E': 'Estimate Total Asian alone',
         'B02001_006E': 'Estimate Total Native Hawaiian and Other Pacific Islander alone',
        #  'B02001_007E': 'Estimate Total Some other race alone',
         'B19013_001E': 'Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars)',
        #  'B14001_005E': 'Estimate Total Enrolled in school Enrolled in grade 1 to grade 4',
        #  'B14001_006E': 'Estimate Total Enrolled in school Enrolled in grade 5 to grade 8',
        #  'B14001_007E': 'Estimate Total Enrolled in school Enrolled in grade 9 to grade 12',
         'B15003_017E': 'Estimate Total Regular high school diploma',
         'B15003_022E': "Estimate Total Bachelor's degree",
         'B15003_023E': "Estimate Total Master's degree",
        #  'B01002_001E': 'Estimate Median age -- Total',
        #  'B01002_002E': 'Estimate Median age -- Male',
        #  'B01002_003E': 'Estimate Median age -- Female',
         'B09010_002E': 'Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months',
         'B11001_002E': 'Estimate Total Family households',
         }

attr_keys = list(attr.keys())
attr_values = list(attr.values())

In [None]:
sd_list = ['28830',
 '20340',
 '35160',
 '35190',
 '11600',
 '14070',
 '02820',
 '28560',
 '09570',
 '21840',
 '35310',
 '12000',
 '28740',
 '21150',
 '29940',
 '33540',
 '32310',
 '14520',
 '16440',
 '19950',
 '21870',
 '00015',
 '04260',
 '34260',
 '34470',
 '33870']


In [None]:
allsddata_total = pd.DataFrame(columns=attr_values)

for year in range(2015,2020):
  allsddata = pd.DataFrame(columns=attr_values)

  for schooldistrict in sd_list:
    failed = []
    try:
      data = censusdata.download('acs1', year, 
              censusdata.censusgeo([('state', '26'),
                                    ('school district (unified)', schooldistrict)]),
                                    # ('block group', '*')]),
              attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
    except:
      failed.append(schooldistrict)
      # print(schooldistrict)
      continue
    data['GeoInformation'] = data.index
    data.reset_index(drop=True, inplace=True)
    data = data.rename(columns=attr)
    #print(data.columns)
    allsddata = allsddata.append(data)
    allsddata['Year'] = year

  allsddata['SchoolDistrict'] = allsddata['GeoInformation'].astype(str).str.split(', Michigan').str[0]
  cols = allsddata.columns.drop(['SchoolDistrict','GeoInformation'])

  allsddata[cols] = allsddata[cols].apply(pd.to_numeric, errors='coerce')
  allsddata = allsddata.reset_index()
  # allsddata_total.append(allsddata)
allsddata

Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,Year,SchoolDistrict
0,0,60038,4770,128,749,25,48709,12520,6704,3128,6000.0,17390,"Port Huron Area School District, Michigan: Sum...",2019,Port Huron Area School District
1,0,40385,13448,152,5999,0,59064,10814,8132,2291,3707.0,16348,"Kentwood Public Schools, Michigan: Summary lev...",2019,Kentwood Public Schools
2,0,84651,10028,288,7807,0,77364,11536,21006,11406,2072.0,27647,"Walled Lake Consolidated Schools, Michigan: Su...",2019,Walled Lake Consolidated Schools
3,0,89386,9197,457,13993,0,59608,26212,12755,7013,6307.0,30480,"Warren Consolidated Schools, Michigan: Summary...",2019,Warren Consolidated Schools
4,0,87929,3904,241,1831,56,53596,12245,9875,5760,14050.0,21017,"Dearborn City School District, Michigan: Summa...",2019,Dearborn City School District
5,0,53619,16391,217,14128,0,88329,6912,20251,14621,,20783,"Farmington Public School District, Michigan: S...",2019,Farmington Public School District
6,0,119679,14454,563,27241,0,77888,6817,28958,25074,1845.0,34202,"Ann Arbor Public Schools, Michigan: Summary le...",2019,Ann Arbor Public Schools
7,0,90981,6955,227,19403,0,91835,12236,25824,17418,1462.0,34002,"Plymouth-Canton Community Schools, Michigan: S...",2019,Plymouth-Canton Community Schools
8,0,85704,8937,279,2218,0,76245,17338,12337,6556,1786.0,25728,"Chippewa Valley Schools, Michigan: Summary lev...",2019,Chippewa Valley Schools
9,0,98647,6492,35,5496,0,73229,17434,20432,9517,2379.0,32148,"Livonia Public Schools, Michigan: Summary leve...",2019,Livonia Public Schools


In [None]:

allsddata2015 = pd.DataFrame(columns=attr_values)
# 2015 - 2021
# consider log median income
# limit covariates to list on google docs
# change acs 5 to acs 1
# consider averaging years for school year 
for schooldistrict in sd_list:
  failed = []
  try:
    data = censusdata.download('acs1', 2015, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', schooldistrict)]),
                                  # ('block group', '*')]),
            attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
  except:
    failed.append(schooldistrict)
    # print(schooldistrict)
    continue
  data['GeoInformation'] = data.index
  data.reset_index(drop=True, inplace=True)
  data = data.rename(columns=attr)
  #print(data.columns)
  
  allsddata2015 = allsddata2015.append(data)

  
allsddata2015['Year'] = 2015

allsddata2015['SchoolDistrict'] = allsddata2015['GeoInformation'].astype(str).str.split(', Michigan').str[0]
cols = allsddata2015.columns.drop(['SchoolDistrict','GeoInformation'])

allsddata2015[cols] = allsddata2015[cols].apply(pd.to_numeric, errors='coerce')
allsddata2015 = allsddata2015.reset_index()
allsddata2015


Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,Year,SchoolDistrict
0,0,61617.0,2784.0,61.0,520.0,0.0,43477,14722.0,4715.0,1590.0,5355.0,18020,"Port Huron Area School District, Michigan: Sum...",2015,Port Huron Area School District
1,0,44606.0,14081.0,186.0,3685.0,0.0,51477,8866.0,10140.0,2645.0,2138.0,16414,"Kentwood Public Schools, Michigan: Summary lev...",2015,Kentwood Public Schools
2,0,79063.0,12353.0,478.0,6234.0,0.0,70876,12483.0,20461.0,9460.0,2882.0,26037,"Walled Lake Consolidated Schools, Michigan: Su...",2015,Walled Lake Consolidated Schools
3,0,,,,,,52940,23300.0,14467.0,5963.0,4857.0,30837,"Warren Consolidated Schools, Michigan: Summary...",2015,Warren Consolidated Schools
4,0,92831.0,4467.0,185.0,2585.0,231.0,51433,11407.0,10927.0,4643.0,15100.0,22176,"Dearborn City School District, Michigan: Summa...",2015,Dearborn City School District
5,0,59170.0,13800.0,51.0,10127.0,0.0,72084,8278.0,19500.0,10720.0,3086.0,22464,"Farmington Public School District, Michigan: S...",2015,Farmington Public School District
6,0,120755.0,13187.0,1044.0,26888.0,118.0,60533,7370.0,31303.0,24240.0,3728.0,33327,"Ann Arbor Public Schools, Michigan: Summary le...",2015,Ann Arbor Public Schools
7,0,91343.0,8516.0,185.0,18212.0,0.0,86928,11107.0,23311.0,14458.0,2579.0,32745,"Plymouth-Canton Community Schools, Michigan: S...",2015,Plymouth-Canton Community Schools
8,0,92214.0,9241.0,147.0,2096.0,0.0,68595,17966.0,14675.0,6921.0,2469.0,27762,"Chippewa Valley Schools, Michigan: Summary lev...",2015,Chippewa Valley Schools
9,0,99950.0,6727.0,150.0,3273.0,0.0,65736,18652.0,17340.0,7708.0,2448.0,29717,"Livonia Public Schools, Michigan: Summary leve...",2015,Livonia Public Schools


In [None]:
allsddata2016 = pd.DataFrame(columns=attr_values)
## 2015 - 2021
## consider log median income
## limit covariates to list on google docs
## change acs 5 to acs 1
## consider averaging years for school year 
for schooldistrict in sd_list:
  failed = []
  try:
    data = censusdata.download('acs1', 2016, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', schooldistrict)]),
                                  # ('block group', '*')]),
            attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
  except:
    failed.append(schooldistrict)
    # print(schooldistrict)
    continue
  data['GeoInformation'] = data.index
  data.reset_index(drop=True, inplace=True)
  data = data.rename(columns=attr)
  #print(data.columns)
  allsddata2016 = allsddata2016.append(data)
allsddata2016['Year'] = 2016

allsddata2016['SchoolDistrict'] = allsddata2016['GeoInformation'].astype(str).str.split(', Michigan').str[0]
cols = allsddata2016.columns.drop(['SchoolDistrict','GeoInformation'])

allsddata2016[cols] = allsddata2016[cols].apply(pd.to_numeric, errors='coerce')
allsddata2016 = allsddata2016.reset_index()
allsddata2016

Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,Year,SchoolDistrict
0,0,59034.0,3193.0,205.0,1094.0,0.0,44605,13234.0,4903.0,1922.0,5543.0,16884,"Port Huron Area School District, Michigan: Sum...",2016,Port Huron Area School District
1,0,47391.0,12964.0,37.0,5716.0,0.0,53174,10664.0,8649.0,3143.0,6045.0,16587,"Kentwood Public Schools, Michigan: Summary lev...",2016,Kentwood Public Schools
2,0,84695.0,9805.0,275.0,5740.0,0.0,75317,11537.0,19915.0,10154.0,,25624,"Walled Lake Consolidated Schools, Michigan: Su...",2016,Walled Lake Consolidated Schools
3,0,93571.0,10949.0,79.0,12569.0,0.0,54673,25614.0,14125.0,5003.0,9845.0,31033,"Warren Consolidated Schools, Michigan: Summary...",2016,Warren Consolidated Schools
4,0,92742.0,4268.0,176.0,1454.0,0.0,48748,11723.0,9919.0,5083.0,15680.0,22529,"Dearborn City School District, Michigan: Summa...",2016,Dearborn City School District
5,0,60964.0,16469.0,40.0,9531.0,0.0,70944,,,,,22912,"Farmington Public School District, Michigan: S...",2016,Farmington Public School District
6,0,121732.0,12749.0,820.0,27169.0,179.0,66454,7081.0,29163.0,25386.0,3114.0,35645,"Ann Arbor Public Schools, Michigan: Summary le...",2016,Ann Arbor Public Schools
7,0,96683.0,6570.0,46.0,13804.0,0.0,89706,12057.0,24754.0,16546.0,1733.0,34410,"Plymouth-Canton Community Schools, Michigan: S...",2016,Plymouth-Canton Community Schools
8,0,86002.0,11474.0,283.0,4226.0,0.0,75486,19551.0,14698.0,6725.0,,28360,"Chippewa Valley Schools, Michigan: Summary lev...",2016,Chippewa Valley Schools
9,0,96776.0,6629.0,504.0,3621.0,62.0,66449,17094.0,17843.0,7484.0,2485.0,28818,"Livonia Public Schools, Michigan: Summary leve...",2016,Livonia Public Schools


In [None]:
allsddata2017 = pd.DataFrame(columns=attr_values)
## 2015 - 2021
## consider log median income
## limit covariates to list on google docs
## change acs 5 to acs 1
## consider averaging years for school year 
for schooldistrict in sd_list:
  failed = []
  try:
    data = censusdata.download('acs1', 2017, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', schooldistrict)]),
                                  # ('block group', '*')]),
            attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
  except:
    failed.append(schooldistrict)
    # print(schooldistrict)
    continue
  data['GeoInformation'] = data.index
  data.reset_index(drop=True, inplace=True)
  data = data.rename(columns=attr)
  #print(data.columns)
  allsddata2017 = allsddata2017.append(data)
allsddata2017['Year'] = 2017

allsddata2017['SchoolDistrict'] = allsddata2017['GeoInformation'].astype(str).str.split(', Michigan').str[0]
cols = allsddata2017.columns.drop(['SchoolDistrict','GeoInformation'])

allsddata2017[cols] = allsddata2017[cols].apply(pd.to_numeric, errors='coerce')
allsddata2017 = allsddata2017.reset_index()
allsddata2017

Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,Year,SchoolDistrict
0,0,63359.0,3362.0,39.0,343.0,60.0,47461,13147.0,5184.0,2781.0,4989.0,18319,"Port Huron Area School District, Michigan: Sum...",2017,Port Huron Area School District
1,0,43125.0,12099.0,318.0,7539.0,0.0,55378,11793.0,9554.0,4313.0,3634.0,16576,"Kentwood Public Schools, Michigan: Summary lev...",2017,Kentwood Public Schools
2,0,88518.0,10685.0,124.0,6090.0,0.0,75278,11992.0,21814.0,10039.0,3468.0,28722,"Walled Lake Consolidated Schools, Michigan: Su...",2017,Walled Lake Consolidated Schools
3,0,92536.0,8485.0,94.0,12908.0,0.0,55653,20648.0,16263.0,6200.0,9075.0,30982,"Warren Consolidated Schools, Michigan: Summary...",2017,Warren Consolidated Schools
4,0,90717.0,4627.0,40.0,1938.0,0.0,45487,12732.0,10808.0,4795.0,14947.0,22444,"Dearborn City School District, Michigan: Summa...",2017,Dearborn City School District
5,0,52375.0,13534.0,166.0,14698.0,30.0,82035,7557.0,18197.0,12888.0,,23220,"Farmington Public School District, Michigan: S...",2017,Farmington Public School District
6,0,120789.0,11739.0,482.0,30303.0,105.0,75568,7506.0,29540.0,27317.0,1584.0,33181,"Ann Arbor Public Schools, Michigan: Summary le...",2017,Ann Arbor Public Schools
7,0,90994.0,9007.0,114.0,18101.0,0.0,87379,14346.0,24657.0,16153.0,2026.0,31944,"Plymouth-Canton Community Schools, Michigan: S...",2017,Plymouth-Canton Community Schools
8,0,94834.0,9304.0,881.0,2970.0,0.0,68735,18506.0,14945.0,4533.0,,27897,"Chippewa Valley Schools, Michigan: Summary lev...",2017,Chippewa Valley Schools
9,0,95227.0,10414.0,224.0,3474.0,0.0,72588,15882.0,19823.0,7936.0,1517.0,30957,"Livonia Public Schools, Michigan: Summary leve...",2017,Livonia Public Schools


In [None]:
allsddata2018 = pd.DataFrame(columns=attr_values)
## 2015 - 2021
## consider log median income
## limit covariates to list on google docs
## change acs 5 to acs 1
## consider averaging years for school year 
for schooldistrict in sd_list:
  failed = []
  try:
    data = censusdata.download('acs1', 2018, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', schooldistrict)]),
                                  # ('block group', '*')]),
            attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
  except:
    failed.append(schooldistrict)
    # print(schooldistrict)
    continue
  data['GeoInformation'] = data.index
  data.reset_index(drop=True, inplace=True)
  data = data.rename(columns=attr)
  #print(data.columns)
  allsddata2018 = allsddata2018.append(data)
allsddata2018['Year'] = 2018

allsddata2018['SchoolDistrict'] = allsddata2018['GeoInformation'].astype(str).str.split(', Michigan').str[0]
cols = allsddata2018.columns.drop(['SchoolDistrict','GeoInformation'])

allsddata2018[cols] = allsddata2018[cols].apply(pd.to_numeric, errors='coerce')
allsddata2018 = allsddata2018.reset_index()
allsddata2018

Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,Year,SchoolDistrict
0,0,61326.0,3765.0,158.0,315.0,57.0,45829,14134,4852,2918,5394.0,17924,"Port Huron Area School District, Michigan: Sum...",2018,Port Huron Area School District
1,0,42190.0,16680.0,127.0,5907.0,166.0,53939,10884,11023,2760,,16658,"Kentwood Public Schools, Michigan: Summary lev...",2018,Kentwood Public Schools
2,0,82950.0,7483.0,211.0,11694.0,0.0,75854,12117,23701,10273,1310.0,26068,"Walled Lake Consolidated Schools, Michigan: Su...",2018,Walled Lake Consolidated Schools
3,0,92497.0,11214.0,571.0,9808.0,0.0,61732,20785,15613,5659,7522.0,29380,"Warren Consolidated Schools, Michigan: Summary...",2018,Warren Consolidated Schools
4,0,92917.0,2807.0,530.0,3098.0,0.0,55156,12710,12435,6519,13415.0,23243,"Dearborn City School District, Michigan: Summa...",2018,Dearborn City School District
5,0,53518.0,14574.0,470.0,12591.0,171.0,87015,8806,19955,12204,,21188,"Farmington Public School District, Michigan: S...",2018,Farmington Public School District
6,0,122124.0,11125.0,152.0,31813.0,92.0,72694,6783,29496,26961,2341.0,34173,"Ann Arbor Public Schools, Michigan: Summary le...",2018,Ann Arbor Public Schools
7,0,95261.0,7512.0,519.0,16106.0,0.0,91148,11730,27718,16059,2811.0,33289,"Plymouth-Canton Community Schools, Michigan: S...",2018,Plymouth-Canton Community Schools
8,0,90349.0,16257.0,709.0,2903.0,0.0,75895,18487,14601,6784,4376.0,29807,"Chippewa Valley Schools, Michigan: Summary lev...",2018,Chippewa Valley Schools
9,0,97318.0,6186.0,58.0,4692.0,0.0,72101,16268,19625,8054,2327.0,28582,"Livonia Public Schools, Michigan: Summary leve...",2018,Livonia Public Schools


In [None]:
allsddata2019 = pd.DataFrame(columns=attr_values)
## 2015 - 2021
## consider log median income
## limit covariates to list on google docs
## change acs 5 to acs 1
## consider averaging years for school year 
for schooldistrict in sd_list:
  failed = []
  try:
    data = censusdata.download('acs1', 2019, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', schooldistrict)]),
                                  # ('block group', '*')]),
            attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
  except:
    failed.append(schooldistrict)
    # print(schooldistrict)
    continue
  data['GeoInformation'] = data.index
  data.reset_index(drop=True, inplace=True)
  data = data.rename(columns=attr)
  #print(data.columns)
  allsddata2019 = allsddata2019.append(data)
allsddata2019['Year'] = 2019

allsddata2019['SchoolDistrict'] = allsddata2019['GeoInformation'].astype(str).str.split(', Michigan').str[0]
cols = allsddata2019.columns.drop(['SchoolDistrict','GeoInformation'])

allsddata2019[cols] = allsddata2019[cols].apply(pd.to_numeric, errors='coerce')
allsddata2019 = allsddata2019.reset_index()
allsddata2019

Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,Year,SchoolDistrict
0,0,60038,4770,128,749,25,48709,12520,6704,3128,6000.0,17390,"Port Huron Area School District, Michigan: Sum...",2019,Port Huron Area School District
1,0,40385,13448,152,5999,0,59064,10814,8132,2291,3707.0,16348,"Kentwood Public Schools, Michigan: Summary lev...",2019,Kentwood Public Schools
2,0,84651,10028,288,7807,0,77364,11536,21006,11406,2072.0,27647,"Walled Lake Consolidated Schools, Michigan: Su...",2019,Walled Lake Consolidated Schools
3,0,89386,9197,457,13993,0,59608,26212,12755,7013,6307.0,30480,"Warren Consolidated Schools, Michigan: Summary...",2019,Warren Consolidated Schools
4,0,87929,3904,241,1831,56,53596,12245,9875,5760,14050.0,21017,"Dearborn City School District, Michigan: Summa...",2019,Dearborn City School District
5,0,53619,16391,217,14128,0,88329,6912,20251,14621,,20783,"Farmington Public School District, Michigan: S...",2019,Farmington Public School District
6,0,119679,14454,563,27241,0,77888,6817,28958,25074,1845.0,34202,"Ann Arbor Public Schools, Michigan: Summary le...",2019,Ann Arbor Public Schools
7,0,90981,6955,227,19403,0,91835,12236,25824,17418,1462.0,34002,"Plymouth-Canton Community Schools, Michigan: S...",2019,Plymouth-Canton Community Schools
8,0,85704,8937,279,2218,0,76245,17338,12337,6556,1786.0,25728,"Chippewa Valley Schools, Michigan: Summary lev...",2019,Chippewa Valley Schools
9,0,98647,6492,35,5496,0,73229,17434,20432,9517,2379.0,32148,"Livonia Public Schools, Michigan: Summary leve...",2019,Livonia Public Schools


In [None]:
allsddata = pd.concat([allsddata2015, allsddata2016, allsddata2017, allsddata2018, allsddata2019], ignore_index=True)

pd.options.display.max_columns = 50
scott_data_reduced = allsddata
scott_data_reduced.drop(columns=['GeoInformation', 'index'], inplace = True)


In [None]:
scott_data_reduced = scott_data_reduced.set_index('SchoolDistrict')
scott_data_reduced

Unnamed: 0_level_0,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,Year
SchoolDistrict,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Port Huron Area School District,61617.0,2784.0,61.0,520.0,0.0,43477,14722.0,4715.0,1590.0,5355.0,18020,2015
Kentwood Public Schools,44606.0,14081.0,186.0,3685.0,0.0,51477,8866.0,10140.0,2645.0,2138.0,16414,2015
Walled Lake Consolidated Schools,79063.0,12353.0,478.0,6234.0,0.0,70876,12483.0,20461.0,9460.0,2882.0,26037,2015
Warren Consolidated Schools,,,,,,52940,23300.0,14467.0,5963.0,4857.0,30837,2015
Dearborn City School District,92831.0,4467.0,185.0,2585.0,231.0,51433,11407.0,10927.0,4643.0,15100.0,22176,2015
...,...,...,...,...,...,...,...,...,...,...,...,...
Wayne-Westland Community School District,61084.0,18747.0,424.0,1196.0,0.0,52867,18146.0,6317.0,2598.0,6069.0,20447,2019
Bay City School District,66652.0,1584.0,128.0,406.0,0.0,49668,14096.0,8241.0,2728.0,2995.0,17738,2019
Troy School District,42072.0,2188.0,0.0,20297.0,0.0,112437,4145.0,15473.0,10858.0,1095.0,18377,2019
Utica Community Schools,170436.0,6804.0,151.0,9621.0,16.0,76070,32450.0,28220.0,13706.0,5643.0,49903,2019


In [None]:
allsddata = pd.DataFrame(columns=attr_values)

for schooldistrict in sd_nums:
  failed = []
  try:
    data = censusdata.download('acs5', 2020, 
            censusdata.censusgeo([('state', '26'),
                                  ('school district (unified)', schooldistrict)]),
                                  # ('block group', '*')]),
            attr_keys, key='b15ade4ca040c6435803ef83605eccaa17abb8e0')  #may need key to access, see report for details on how to obtain key
  except:
    failed.append(schooldistrict)
    # print(schooldistrict)
    continue
  data['GeoInformation'] = data.index
  data.reset_index(drop=True, inplace=True)
  data = data.rename(columns=attr)
  #print(data.columns)
  allsddata = allsddata.append(data)

allsddata['SchoolDistrict'] = allsddata['GeoInformation'].astype(str).str.split(', Michigan').str[0]
cols = allsddata.columns.drop(['SchoolDistrict','GeoInformation'])

allsddata[cols] = allsddata[cols].apply(pd.to_numeric, errors='coerce')
allsddata = allsddata.reset_index()
allsddata

Unnamed: 0,index,Estimate Total White alone,Estimate Total Black or African American alone,Estimate Total American Indian and Alaska Native alone,Estimate Total Asian alone,Estimate Total Native Hawaiian and Other Pacific Islander alone,Estimate Median household income in the past 12 months (in 2018 inflation-adjusted dollars),Estimate Total Regular high school diploma,Estimate Total Bachelor's degree,Estimate Total Master's degree,"Estimate Total Living in household with Supplemental Security Income (SSI), cash public assistance income, or Food Stamps/SNAP in the past 12 months",Estimate Total Family households,GeoInformation,SchoolDistrict
0,0,3942,42,329,17,0,45000,1273,367,137,278,1251,"L'Anse Area Schools, Michigan: Summary level: ...",L'Anse Area Schools
1,0,1968,4,9,43,0,51453,670,224,66,96,518,"Superior Central Schools, Michigan: Summary le...",Superior Central Schools
2,0,2968,10,0,91,0,53716,454,228,166,272,695,"Adams Township School District, Michigan: Summ...",Adams Township School District
3,0,7960,0,19,6,0,67762,1733,1580,663,138,2527,"Addison Community Schools, Michigan: Summary l...",Addison Community Schools
4,0,27048,895,99,74,10,48250,6317,2474,1551,1632,6881,"Adrian City School District, Michigan: Summary...",Adrian City School District
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
509,0,26379,1830,106,826,0,59597,6853,3091,1234,1249,7671,"Southgate Community School District, Michigan:...",Southgate Community School District
510,0,13954,69,30,259,0,73054,3190,1563,826,427,3976,"Sparta Area Schools, Michigan: Summary level: ...",Sparta Area Schools
511,0,12133,18,54,76,0,81333,1605,2630,1037,62,3635,"Spring Lake Public Schools, Michigan: Summary ...",Spring Lake Public Schools
512,0,5111,95,82,8,0,61667,1299,354,135,256,1435,"Springport Public Schools, Michigan: Summary l...",Springport Public Schools


In [None]:
allsddata.drop(columns=['GeoInformation', 'index'], inplace = True)
scott_data = allsddata.set_index('SchoolDistrict')


In [None]:
from google.colab import  drive
drive.mount('/drive')
scott_data.to_csv('/drive/My Drive/SIADS_697_698_Capstone/4_Colabs/3_Data_Final/Demographic_Data.csv')

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


END OF CAPSTONE CODE FOR DEMOGRAPHIC DATA
