In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import geoid
import matplotlib.pyplot as plt
from IPython.display import display 
from itertools import islice

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()


In [2]:
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg

In [3]:
# List the census summary level names for the three types of school districts, 
# elementary, secondary, and unified. 
display([k for k in geoid.core.names.keys() if k.startswith('sd')])

# Create a dataframe of all school districts from the Census
# B01003 is the smallest census table; we just need the names and geoids. 

sdelm = pkg.reference('sdelm').dataframe()
sdelm['sd_type'] = 'elem'
sdsec = pkg.reference('sdsec').dataframe()
sdsec['sd_type'] = 'sec'
sduni = pkg.reference('sduni').dataframe()
sduni['sd_type'] = 'uni'

sd = pd.concat([sdelm, sdsec, sduni]).rename(columns={'name':'district_name'})

# Now make a map from the NCES code for the district, which is used to make the district geoids
from itertools import islice
# Geoids ending in '0699999' are for 'Remainder of California', and are not real districts
census_sd= { str(k)[-7:]:{'sd_type':v.sd_type, 
                          'district_name':v.district_name,
                          'geoid':str(k)}
            for k,v in sd.iterrows() if str(k)[-7:] != '0699999'}


['sdelm', 'sdsec', 'sduni']

In [4]:
# Create Dataframes

public_schools_df = pkg.reference('public_schools').dataframe()
school_districts_df = pkg.reference('school_districts').dataframe()

public_schools_df['cdscode'] = public_schools_df['cdscode'].astype(str)

from collections import namedtuple
CDSCode = namedtuple('CDSCode','county district school')
def expand_cds(cdscode):
    cs = str(cdscode)
    if len(cs) in (6,13):
        # Conversion to int strips leading zero
        cs = '0'+cs
   
    if len(cs) == 14 :
        return CDSCode(cs[0:2], cs[2:-7], cs[:-7]) 
   
    elif len(cs) == 7:
        return CDSCode(cs[0:2], cs[2:-7], None) 

public_schools_df['cds_parts'] = public_schools_df.cdscode.apply(lambda v: expand_cds(v))
public_schools_df['cd_code'] = public_schools_df.cdscode.apply(lambda v: ''.join(expand_cds(v)[0:2]))


In [5]:
t = public_schools_df[['cd_code','ncesdist']].drop_duplicates().replace({'No Data': 'None'})
rows = []
for index, r in t.iterrows():
    e = census_sd.get(r.ncesdist)
    if e:
        if r.cd_code:
            e['cd_code'] = r.cd_code
        else:
            e['cd_code'] = None
        rows.append((r.ncesdist, e['geoid'], e['cd_code'], e['district_name'],e['sd_type']))
        
df = pd.DataFrame(rows,columns='nces_code geoid cd_code district_name sd_type'.split())
df.head()

Unnamed: 0,nces_code,geoid,cd_code,district_name,sd_type
0,601770,97000US0601770,161119,"Alameda City Unified School District, California",uni
1,601860,97000US0601860,161127,"Albany City Unified School District, California",uni
2,604740,97000US0604740,161143,"Berkeley Unified School District, California",uni
3,607800,97000US0607800,161150,"Castro Valley Unified School District, California",uni
4,612630,97000US0612630,161168,"Emery Unified School District, California",uni


In [6]:
pkg.reference('county_codes')

Header,Type,Description
notary_code,string,County code used by the California notary and Secretary of State
fips_code,string,County code used by the US Census
simple_name,string,Simple county name
census_name,string,County name in the US Census


In [7]:
cc = pkg.reference('county_codes').dataframe()
cc.head()

Unnamed: 0,notary_code,fips_code,simple_name,census_name
0,1,1,Alameda,"Alameda County, California"
1,2,3,Alpine,"Alpine County, California"
2,3,5,Amador,"Amador County, California"
3,4,7,Butte,"Butte County, California"
4,5,9,Calaveras,"Calaveras County, California"


In [8]:
cc_map = { r.notary_code:r.fips_code for idx, r in cc.iterrows()}

In [14]:
df['notary_county_code'] = df.cd_code.apply(lambda v: v[:2] if v else None)
df['fips_county_code'] = df.notary_county_code.apply(lambda v: cc_map.get(v) if v else None)
df

Unnamed: 0,nces_code,geoid,cd_code,district_name,sd_type,notary_county_code,fips_county_code
0,0601770,97000US0601770,0161119,"Alameda City Unified School District, California",uni,01,01
1,0601860,97000US0601860,0161127,"Albany City Unified School District, California",uni,01,01
2,0604740,97000US0604740,0161143,"Berkeley Unified School District, California",uni,01,01
3,0607800,97000US0607800,0161150,"Castro Valley Unified School District, California",uni,01,01
4,0612630,97000US0612630,0161168,"Emery Unified School District, California",uni,01,01
5,0614400,97000US0614400,0161176,"Fremont Unified School District, California",uni,01,01
6,0616740,97000US0616740,0161192,"Hayward Unified School District, California",uni,01,01
7,0622110,97000US0622110,0161200,Livermore Valley Joint Unified School District...,uni,01,01
8,0626130,95000US0626130,0161218,"Mountain House Elementary School District, Cal...",elem,01,01
9,0627060,97000US0627060,0161234,"Newark Unified School District, California",uni,01,01
