# School Mapping
## Analyzer

This notebook builds a normalized dataset mapping California Department of Education (CDE) school metadata to SFUSD school metadata. There is not much algorithmic work, and a lot of manual matching.

***

### `TODO:`

* Extend the dataset to include TK/EES sites (currently the data is joined on lottery application data)
* Extend the dataset to include charters
* Figure out what's happening with Ida B Wells

In [80]:
import pandas as pd
import re

# Load SFUSD data from the lottery directory; in the future it should be pulled elsewhere, but this is reliable at the moment
df_sfusd = pd.read_csv( '../../lottery/merged/sfusd-school-id-mapping.csv' )
# Pull CDE data from the enrollment directory
df_cde = pd.read_csv( '../../enrollment/raw/pubschls.txt', sep = '\t' )

In [None]:
# Define manual school mapping
# --> Should this be offloaded to a CSV?
school_map = {
    'Academy (The)- SF @McAteer': 'The Academy - SF @ McAteer HS',
    'Asawa (Ruth) SF Sch of the Arts, A Public School': 'Asawa (Ruth) SOTA HS',
    'Brown Jr. (Willie L) MS': 'Brown Jr. (Willie) MS',
    'Buena Vista/ Horace Mann K-8': 'Buena Vista Horace Mann K-8',
    'Burton (Phillip and Sala) Academic HS': 'Burton (Phillip and Sala) HS',
    'Carmichael (Bessie)/FEC': 'Carmichael (Bessie) K-8',
    'Carver (George Washington) ES': 'Carver (Dr George W) ES',
    'Chinese Immersion School at DeAvila': 'Chinese Immersion School at DeAvila ES',
    'Clarendon Alternative ES': 'Clarendon ES',
    'Cobb (William L.) ES': 'Cobb (Dr William L) ES',
    'Drew (Charles) College Preparatory Academy': 'Drew (Dr Charles) College Preparatory Academy ES',
    'Flynn (Leonard R.) ES': 'Flynn (Leonard R) ES',
    'Havard (Leola M.) Early Education': 'Havard Tk',
    'International Studies Academy': 'Int\'L Studies Acad',
    'Jordan (June) School for Equity': 'Jordan (June) HS',
    'King Jr. (Martin Luther) Academic MS': 'King Jr (Dr Martin L) MS',
    'Lakeshore Alternative ES': 'Lakeshore ES',
    'Lau (Gordon J.) ES': 'Lau (Gordon J) ES',
    'Lawton Alternative': 'Lawton K-8',
    'Lee (Edwin and Anita) Newcomer': 'Lee (Edwin and Anita) Newcomer ES',
    'Lilienthal (Claire) ES': 'Lilienthal (Claire) K-8',
    'Malcolm X Academy': 'Malcolm X Academy ES',
    'Milk (Harvey) Civil Rights ES': 'Milk (Harvey) Civil Right ES',
    'Mission Education Center': 'Mission Education Center ES',
    'Moscone (George R.) ES': 'Moscone (George R) ES',
    'Noriega Children Center': 'Noriega EES',
    'Presidio Early Ed.': 'Presidio Tk',
    'Revere (Paul) ES': 'Revere (Paul) K-8',
    'Rooftop ES': 'Rooftop K-8',
    'S.F. International HS': 'SF International HS',
    'San Francisco Community Alternative': 'SF Community K-8',
    'San Francisco Public Montessori': 'SF Public Montessori ES',
    'Spring Valley ES': 'Spring Valley Science ES',
    'Stockton (Commodore) Children Center': 'Stockton (Commodore) EES',
    'Taylor (Edward R.) ES': 'Taylor (Edward R) ES',
    'Tenderloin Community': 'Tenderloin Community ES',
    'Tule Elk Park Children Center': 'Tule Elk Park EES',
    'Wallenberg (Raoul) Traditional HS': 'Wallenberg (Raoul) HS',
    'Yick Wo ES': 'Wo (Yick) ES',
    'Yu (Alice Fong) ES': 'Yu (Alice Fong) K-8'
}

In [85]:
for key in sorted( school_map.keys() ):
    print( f"'{key}': '{school_map[key]}'," )

'Academy (The)- SF @McAteer': 'The Academy - SF @ McAteer HS',
'Asawa (Ruth) SF Sch of the Arts, A Public School': 'Asawa (Ruth) SOTA HS',
'Brown Jr. (Willie L) MS': 'Brown Jr. (Willie) MS',
'Buena Vista/ Horace Mann K-8': 'Buena Vista Horace Mann K-8',
'Burton (Phillip and Sala) Academic HS': 'Burton (Phillip and Sala) HS',
'Carmichael (Bessie)/FEC': 'Carmichael (Bessie) K-8',
'Carver (George Washington) ES': 'Carver (Dr George W) ES',
'Chinese Immersion School at DeAvila': 'Chinese Immersion School at DeAvila ES',
'Clarendon Alternative ES': 'Clarendon ES',
'Cobb (William L.) ES': 'Cobb (Dr William L) ES',
'Drew (Charles) College Preparatory Academy': 'Drew (Dr Charles) College Preparatory Academy ES',
'Flynn (Leonard R.) ES': 'Flynn (Leonard R) ES',
'Havard (Leola M.) Early Education': 'Havard Tk',
'International Studies Academy': 'Int'L Studies Acad',
'Jordan (June) School for Equity': 'Jordan (June) HS',
'King Jr. (Martin Luther) Academic MS': 'King Jr (Dr Martin L) MS',
'Lakeshor

In [84]:
df = df_cde \
    [lambda _df: _df['District'] == 'San Francisco Unified'] \
    [lambda _df: ( _df['Charter'] == 'N' ) & ( _df['Virtual'] == 'N' )] \
    [lambda _df: _df['SOCType'] != 'Preschool'] \
    .assign( school_modified = lambda _df: _df['School'] )

for substitution in [
    ( 'Elementary', 'ES' ),
    ( 'Middle', 'MS' ),
    ( 'High', 'HS' )
]:
    df = df.assign(
        school_modified = lambda _df: _df['school_modified'].apply( lambda x: re.sub( *substitution, x ) )
    )

df = df.assign(
    school_modified = lambda _df: _df['school_modified'].map( school_map ).fillna( _df['school_modified'] )
)

df \
    .merge(
        df_sfusd
            [lambda _df: ~_df['school_name'].str.contains( 'Tk' )]
            [lambda _df: ~_df['school_name'].str.contains( 'EES' )],
        left_on = 'school_modified',
        right_on = 'school_name',
        how = 'outer'
    ) \
    .reset_index( drop = True ) \
    .to_csv( './school-id-mapping.csv' )