In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from my_functions import combokey_converter

%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')

In [2]:
raw = pd.read_csv('../filtered_data/00_crdc_1516_initial.csv', 
                        dtype = {'LEAID':np.object})

In [3]:
filtered = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})

In [4]:
raw['COMBOKEY'] = combokey_converter.convert(raw, 'LEAID', 'SCHID')

In [5]:
filtered_combo = filtered[['COMBOKEY','LAT1516']]

In [6]:
filtered_raw_joined = pd.merge(raw, filtered_combo, on = 'COMBOKEY', how = 'left')
filtered_from_raw = filtered_raw_joined.copy()
filtered_from_raw = filtered_from_raw[filtered_from_raw.LAT1516.isnull()]

In [7]:
len(filtered_from_raw)

80635

In [8]:
filtered_from_raw.columns.values

array(['LEA_STATE', 'LEA_STATE_NAME', 'LEAID', 'LEA_NAME', 'SCHID',
       'SCH_NAME', 'COMBOKEY', 'JJ', 'SCH_GRADE_PS', 'SCH_GRADE_KG',
       'SCH_GRADE_G01', 'SCH_GRADE_G02', 'SCH_GRADE_G03', 'SCH_GRADE_G04',
       'SCH_GRADE_G05', 'SCH_GRADE_G06', 'SCH_GRADE_G07', 'SCH_GRADE_G08',
       'SCH_GRADE_G09', 'SCH_GRADE_G10', 'SCH_GRADE_G11', 'SCH_GRADE_G12',
       'SCH_GRADE_UG', 'SCH_UGDETAIL_HS', 'SCH_STATUS_SPED',
       'SCH_STATUS_MAGNET', 'SCH_STATUS_CHARTER', 'SCH_STATUS_ALT',
       'SCH_ENR_HI_M', 'SCH_ENR_HI_F', 'SCH_ENR_AM_M', 'SCH_ENR_AM_F',
       'SCH_ENR_AS_M', 'SCH_ENR_AS_F', 'SCH_ENR_HP_M', 'SCH_ENR_HP_F',
       'SCH_ENR_BL_M', 'SCH_ENR_BL_F', 'SCH_ENR_WH_M', 'SCH_ENR_WH_F',
       'SCH_ENR_TR_M', 'SCH_ENR_TR_F', 'TOT_ENR_M', 'TOT_ENR_F',
       'SCH_ENR_LEP_M', 'SCH_ENR_LEP_F', 'SCH_ENR_IDEA_M',
       'SCH_ENR_IDEA_F', 'SCH_DUAL_IND', 'SCH_DUALENR_HI_M',
       'SCH_DUALENR_HI_F', 'SCH_DUALENR_AM_M', 'SCH_DUALENR_AM_F',
       'SCH_DUALENR_AS_M', 'SCH_DUALENR_AS_F

# Analysis

In [9]:
def missing_value_mapper(value):
    """Converts any negative number into 0, as these negative numbers represent missing/null values"""
    if isinstance(value, int):
        if value < 0:
            return 0
    return value

filtered_from_raw = filtered_from_raw.applymap(missing_value_mapper)

In [10]:
filtered_from_raw['de_total_enrollment'] = filtered_from_raw['TOT_DUALENR_M'] + filtered_from_raw['TOT_DUALENR_F']

### DE

In [11]:
schools_with_de_students = filtered_from_raw.copy()
schools_with_de_students = schools_with_de_students[schools_with_de_students.de_total_enrollment > 0]

** Fiiltered-Schools with DE students **

In [12]:
first_columns = ['SCH_NAME', 'de_total_enrollment']
reorder = first_columns + [c for c in schools_with_de_students.columns if c not in first_columns]
schools_with_de_students = schools_with_de_students[reorder].sort_values('de_total_enrollment', ascending = False)
schools_with_de_students

Unnamed: 0,SCH_NAME,de_total_enrollment,LEA_STATE,LEA_STATE_NAME,LEAID,LEA_NAME,SCHID,COMBOKEY,JJ,SCH_GRADE_PS,...,SCH_IBENR_WH_F,SCH_IBENR_TR_M,SCH_IBENR_TR_F,TOT_IBENR_M,TOT_IBENR_F,SCH_IBENR_LEP_M,SCH_IBENR_LEP_F,SCH_IBENR_IDEA_M,SCH_IBENR_IDEA_F,LAT1516
59112,BROOKLYN TECHNICAL HIGH SCHOOL,3778,NY,NEW YORK,3620580,NEW YORK CITY PUBLIC SCHOOLS,1928,='362058001928',No,No,...,0,0,0,0,0,0,0,0,0,
55306,Passaic County Technical Institute,1463,NJ,NEW JERSEY,3412630,Passsaic County Vocational School District,4860,='341263004860',No,No,...,0,0,0,0,0,0,0,0,0,
2671,Basha High School,1278,AZ,ARIZONA,401870,Chandler Unified District #80,2250,='040187002250',No,No,...,0,0,0,0,0,0,0,0,0,
31069,McKenzie Career Center,1205,IN,INDIANA,1805670,M S D Lawrence Township,725,='180567000725',No,No,...,5,0,0,2,9,0,0,0,0,
53807,Bergen County Academies,1014,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,250,='340147000250',No,No,...,20,5,5,71,74,0,0,0,0,
92581,South Kitsap High School,752,WA,WASHINGTON,5308160,South Kitsap School District,1357,='530816001357',No,No,...,0,0,0,0,0,0,0,0,0,
68466,Penta Career Center - on campus,686,OH,OHIO,3905135,Penta Career Center - District,4080,='390513504080',No,No,...,0,0,0,0,0,0,0,0,0,
91522,Puget Sound Skills Center,682,WA,WASHINGTON,5303540,Highline School District,2103,='530354002103',No,No,...,0,0,0,0,0,0,0,0,0,
53809,Bergen County Technical High School - Teterboro,631,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,262,='340147000262',No,No,...,0,0,0,0,0,0,0,0,0,
15147,COLORADO EARLY COLLEGE FORT COLLINS,603,CO,COLORADO,800020,State Charter School Institute,6542,='080002006542',No,No,...,0,0,0,0,0,0,0,0,0,


**Total DE Students**

In [13]:
"""Total DE Students"""
schools_with_de_students.de_total_enrollment.sum()

104229

**Juvenile Justice**

In [14]:
"""Juvenile Justice Schools - How many schools and DE students?"""
print(schools_with_de_students.JJ.value_counts())
print()
print(str(schools_with_de_students[schools_with_de_students.JJ == 'Yes'].de_total_enrollment.sum()), "DE students.")

No     2678
Yes      15
Name: JJ, dtype: int64

388 DE students.


**Alternative**

In [15]:
"""Alternative Schools - How many schools and DE students?"""
print(schools_with_de_students.SCH_STATUS_ALT.value_counts())
print()
print(str(schools_with_de_students[schools_with_de_students.SCH_STATUS_ALT == 'Yes'].de_total_enrollment.sum()), "DE students.")

No     2340
Yes     353
Name: SCH_STATUS_ALT, dtype: int64

7555 DE students.


**Special Education**

In [16]:
"""Special Education - How many schools and DE students?"""
print(schools_with_de_students.SCH_STATUS_SPED.value_counts())
print()
print(str(schools_with_de_students[schools_with_de_students.SCH_STATUS_SPED == 'Yes'].de_total_enrollment.sum()), "DE students.")

No     2610
Yes      83
Name: SCH_STATUS_SPED, dtype: int64

2903 DE students.


**Grade Ranges**

In [37]:
def school_grade_range(df):
    """Calculates the number of schools in a given DataFrame (df) of a particular distribution of grades:
        Example:  If a school (in df) offers grades 9,10,11,12 exclusively, they are placed in the '9-12' 
                    category. """
    df_grade_range = []
    for index, row in df.iterrows():
        grade_range = []

        if row.SCH_GRADE_G12 == 'Yes':
            grade_range.append(12)
        if row.SCH_GRADE_G11 == 'Yes':
            grade_range.append(11)
        if row.SCH_GRADE_G10 == 'Yes':
            grade_range.append(10)
        if row.SCH_GRADE_G09 == 'Yes':
            grade_range.append(9)
        if row.SCH_GRADE_G08 == 'Yes':
            grade_range.append(8)
        if row.SCH_GRADE_G07 == 'Yes':
            grade_range.append(7)
        if row.SCH_GRADE_G06 == 'Yes':
            grade_range.append(6)
        if row.SCH_GRADE_G05 == 'Yes':
            grade_range.append(5)
        if row.SCH_GRADE_G04 == 'Yes':
            grade_range.append(4)
        if row.SCH_GRADE_G03 == 'Yes':
            grade_range.append(3)
        if row.SCH_GRADE_G02 == 'Yes':
            grade_range.append(2)
        if row.SCH_GRADE_G01 == 'Yes':
            grade_range.append(1)
        if row.SCH_GRADE_KG == 'Yes':
            grade_range.append('kg')
        if row.SCH_GRADE_PS == 'Yes':
            grade_range.append('pk')
        
        if grade_range == [12,11,10,9,8,7,6,5,4,3,2,1,'kg','pk']:
            df_grade_range.append('pk-12')
        elif grade_range == [12,11,10,9,8,7,6,5,4,3,2,1,'kg']:
            df_grade_range.append('kg-12')
        elif grade_range == [12,11,10,9,8,7,6,5]:
            df_grade_range.append('05-12')
        elif grade_range == [12,11,10,9,8,7,6]:
            df_grade_range.append('06-12')
        elif grade_range == [12,11,10,9,8,7]:
            df_grade_range.append('07-12')
        elif grade_range == [12,11,10,9,8]:
            df_grade_range.append('08-12')
        elif grade_range == [12,11,10,9]:
            df_grade_range.append('09-12')
        elif grade_range == [11,10,9]:
            df_grade_range.append('09-11')
        elif grade_range == [10,9]:
            df_grade_range.append('09-10')
        elif grade_range == [12,11,10]:
            df_grade_range.append('10-12')
        elif grade_range == [12,11]:
            df_grade_range.append('11-12')
        elif grade_range == [9]:
            df_grade_range.append('9-only')
        elif grade_range == [10]:
            df_grade_range.append('10-only')
        elif grade_range == [11]:
            df_grade_range.append('11-only')
        elif grade_range == [12]:
            df_grade_range.append('12-only')
        else:
            df_grade_range.append('other')
    return pd.DataFrame(df_grade_range, columns=['grade_range'])

In [38]:
de_stud_school_grade_range_list = school_grade_range(schools_with_de_students).join(schools_with_de_students['de_total_enrollment'].reset_index(drop = True), how = 'outer')

de_stud_school_grade_range_dist = de_stud_school_grade_range_list['grade_range'].value_counts().reset_index().sort_values('index').set_index('index')
de_stud_school_grade_range_dist['pct_of_schools'] = round(de_stud_school_grade_range_dist['grade_range'] / len(de_stud_school_grade_range_list), 3)

de_stud_school_grade_range_enrollments = pd.DataFrame(de_stud_school_grade_range_list.groupby('grade_range')['de_total_enrollment'].sum())

de_stud_school_grade_range_dist.join(de_stud_school_grade_range_enrollments).rename({'grade_range': '# schools'}, axis = 1)

Unnamed: 0_level_0,# schools,pct_of_schools,de_total_enrollment
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
05-12,34,0.013,1516
06-12,666,0.247,25242
07-12,99,0.037,2722
08-12,27,0.01,1399
09-10,15,0.006,975
09-11,22,0.008,1185
09-12,548,0.203,36343
10-12,77,0.029,4366
11-12,54,0.02,3331
11-only,1,0.0,32


## Get NCES information and join with schools_with_de_students

In [21]:
schools_with_de_students = schools_with_de_students.drop(['LAT1516'], axis = 1)

In [22]:
nces_1516_full = pd.read_csv('../filtered_data/01_nces_1516_initial_combined_ccd.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [23]:
schools_with_de_students_nces = pd.merge(schools_with_de_students, nces_1516_full, left_on = 'COMBOKEY', right_on = 'combokey', how = 'left', suffixes = ('', '_nces'))

In [24]:
non_matching_schools_with_de = schools_with_de_students_nces[schools_with_de_students_nces.LEVEL.isnull()]
match_schools_with_de = schools_with_de_students_nces[schools_with_de_students_nces.LEVEL.notnull()]

In [25]:
print(match_schools_with_de.de_total_enrollment.sum())
print(non_matching_schools_with_de.de_total_enrollment.sum())

93901
10328


**Recover some of the non-nces-matching schools**

In [26]:
"""Passing in my recovered_schools from 00_initial_filter"""
%store -r recovered_schools_all

In [27]:
recovered_schools_all = recovered_schools_all.reset_index()
nces_columns = ['COMBOKEY', 'SCH_TYPE', 'LEVEL', 'VIRTUAL', 'GSLO', 'GSHI',
       'NMCNTY15', 'LOCALE15', 'LAT1516', 'LON1516']
recovered_schools_all_nces = recovered_schools_all[nces_columns]

In [28]:
non_matching_schools_with_de = non_matching_schools_with_de.drop(['SCH_TYPE', 'LEVEL', 'VIRTUAL', 'GSLO', 'GSHI',
       'NMCNTY15', 'LOCALE15', 'LAT1516', 'LON1516'],axis = 1)

In [29]:
recovered_non_matchings = pd.merge(non_matching_schools_with_de, recovered_schools_all_nces, on='COMBOKEY')

In [30]:
print(recovered_non_matchings.de_total_enrollment.sum(), "DE students Recovered.")

7914 DE students Recovered.


In [31]:
match_schools_with_de = match_schools_with_de.append(recovered_non_matchings)

In [32]:
print(format(match_schools_with_de.de_total_enrollment.sum(), ",d"), "out of", format(schools_with_de_students.de_total_enrollment.sum(),',d'), 
     "DE Students Accounted for.")

101,815 out of 104,229 DE Students Accounted for.


**Filtered Schools that match with NCES**

**LEVEL**<br>
1 = Primary (low grade = PK through 03; high grade = PK through 08), 2 = Middle (low grade = 04 through 07; high grade = 04 through 09), 3 = High (low grade = 07 through 12; high grade = 12 only), 4 = Other (any other configuration not falling within the above three categories;including ungraded), N = Not applicable


In [33]:
"""How many schools and de students in each LEVEL of school"""
print(match_schools_with_de.LEVEL.value_counts().reset_index().sort_values('index').set_index('index'))
print()
print(str(match_schools_with_de.groupby('LEVEL').de_total_enrollment.sum()), 'DE students.')

       LEVEL
index       
1         22
2         24
3        656
4       1883
N         25

LEVEL
1      291
2     1605
3    38937
4    59953
N     1029
Name: de_total_enrollment, dtype: int64 DE students.


In [72]:
"""Looking just at the level_4 schools, where are most of the students?  Notice, the grade_range (index) comes
    from CRDC grades -- Several inconsistencies"""
"""Basically these are the schools there are filtered out due to being LEVEL=4, regardless of their CRDC grade range"""
level_4_schools = match_schools_with_de[match_schools_with_de.LEVEL == '4'].reset_index(drop=True)

de_stud_school_grade_range_list = school_grade_range(level_4_schools).join(level_4_schools['de_total_enrollment'].reset_index(drop = True), how = 'outer')

de_stud_school_grade_range_dist = de_stud_school_grade_range_list['grade_range'].value_counts().reset_index().sort_values('index').set_index('index')
de_stud_school_grade_range_dist['pct_of_schools'] = round(de_stud_school_grade_range_dist['grade_range'] / len(de_stud_school_grade_range_list), 3)

de_stud_school_grade_range_enrollments = pd.DataFrame(de_stud_school_grade_range_list.groupby('grade_range')['de_total_enrollment'].sum())

de_stud_school_grade_range_dist.join(de_stud_school_grade_range_enrollments).rename({'grade_range': '# schools'}, axis = 1)

Unnamed: 0_level_0,# schools,pct_of_schools,de_total_enrollment
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
05-12,33,0.018,1514
06-12,649,0.345,24785
07-12,50,0.027,1282
08-12,6,0.003,216
09-10,12,0.006,931
09-11,22,0.012,1185
09-12,51,0.027,5431
10-12,4,0.002,792
9-only,14,0.007,1076
kg-12,430,0.228,10295


In [73]:
"""Misreported 9-12 schools"""
level_4_school_with_grade_range = pd.concat([level_4_schools, de_stud_school_grade_range_list], axis=1,)
level_4_school_with_grade_range[level_4_school_with_grade_range.grade_range == '09-12']
with pd.option_context('display.max_columns',150):
    display(level_4_school_with_grade_range[level_4_school_with_grade_range.grade_range == 'other'])

Unnamed: 0,SCH_NAME,de_total_enrollment,LEA_STATE,LEA_STATE_NAME,LEAID,LEA_NAME,SCHID,COMBOKEY,JJ,SCH_GRADE_PS,SCH_GRADE_KG,SCH_GRADE_G01,SCH_GRADE_G02,SCH_GRADE_G03,SCH_GRADE_G04,SCH_GRADE_G05,SCH_GRADE_G06,SCH_GRADE_G07,SCH_GRADE_G08,SCH_GRADE_G09,SCH_GRADE_G10,SCH_GRADE_G11,SCH_GRADE_G12,SCH_GRADE_UG,SCH_UGDETAIL_HS,SCH_STATUS_SPED,SCH_STATUS_MAGNET,SCH_STATUS_CHARTER,SCH_STATUS_ALT,SCH_ENR_HI_M,SCH_ENR_HI_F,SCH_ENR_AM_M,SCH_ENR_AM_F,SCH_ENR_AS_M,SCH_ENR_AS_F,SCH_ENR_HP_M,SCH_ENR_HP_F,SCH_ENR_BL_M,SCH_ENR_BL_F,SCH_ENR_WH_M,SCH_ENR_WH_F,SCH_ENR_TR_M,SCH_ENR_TR_F,TOT_ENR_M,TOT_ENR_F,SCH_ENR_LEP_M,SCH_ENR_LEP_F,SCH_ENR_IDEA_M,SCH_ENR_IDEA_F,SCH_DUAL_IND,SCH_DUALENR_HI_M,SCH_DUALENR_HI_F,SCH_DUALENR_AM_M,SCH_DUALENR_AM_F,SCH_DUALENR_AS_M,SCH_DUALENR_AS_F,SCH_DUALENR_HP_M,SCH_DUALENR_HP_F,SCH_DUALENR_BL_M,SCH_DUALENR_BL_F,SCH_DUALENR_WH_M,SCH_DUALENR_WH_F,SCH_DUALENR_TR_M,SCH_DUALENR_TR_F,TOT_DUALENR_M,TOT_DUALENR_F,SCH_DUALENR_LEP_M,SCH_DUALENR_LEP_F,SCH_DUALENR_IDEA_M,SCH_DUALENR_IDEA_F,SCH_APENR_IND,SCH_APENR_HI_M,SCH_APENR_HI_F,SCH_APENR_AM_M,SCH_APENR_AM_F,SCH_APENR_AS_M,SCH_APENR_AS_F,SCH_APENR_HP_M,SCH_APENR_HP_F,SCH_APENR_BL_M,SCH_APENR_BL_F,SCH_APENR_WH_M,SCH_APENR_WH_F,SCH_APENR_TR_M,SCH_APENR_TR_F,TOT_APENR_M,TOT_APENR_F,SCH_APENR_LEP_M,SCH_APENR_LEP_F,SCH_APENR_IDEA_M,SCH_APENR_IDEA_F,SCH_IBENR_IND,SCH_IBENR_HI_M,SCH_IBENR_HI_F,SCH_IBENR_AM_M,SCH_IBENR_AM_F,SCH_IBENR_AS_M,SCH_IBENR_AS_F,SCH_IBENR_HP_M,SCH_IBENR_HP_F,SCH_IBENR_BL_M,SCH_IBENR_BL_F,SCH_IBENR_WH_M,SCH_IBENR_WH_F,SCH_IBENR_TR_M,SCH_IBENR_TR_F,TOT_IBENR_M,TOT_IBENR_F,SCH_IBENR_LEP_M,SCH_IBENR_LEP_F,SCH_IBENR_IDEA_M,SCH_IBENR_IDEA_F,combokey,TITLEI,LEAID_nces,LEA_NAME_nces,SCHID_nces,SCH_NAME_nces,SCH_TYPE_TEXT,SCH_TYPE,LEVEL,VIRTUAL,GSLO,GSHI,NMCNTY15,LOCALE15,LAT1516,LON1516,grade_range,de_total_enrollment.1
9,Sherman Oaks Center for Enriched Studies,321,CA,CALIFORNIA,622710,Los Angeles Unified,9151,='062271009151',No,No,No,No,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,-9,No,Yes,No,No,359,377,2,5,200,182,8,5,38,38,413,413,23,26,1043,1046,32,29,55,31,Yes,47,59,2,0,23,41,2,2,5,5,62,71,0,2,141,180,0,0,0,8,Yes,62,62,2,2,50,50,5,2,8,5,83,80,0,2,210,203,2,0,2,2,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='062271009151',Yes,622710.0,Los Angeles Unified,9151.0,Sherman Oaks Center for Enriched Studies,Alternative Education School,4,4,No,4,12,Los Angeles County,11,34.1849,-118.538,other,321
11,WOODGROVE HIGH,295,VA,VIRGINIA,5102250,LOUDOUN CO PBLC SCHS,2812,='510225002812',No,Yes,No,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,No,-9,No,No,No,No,53,50,2,2,29,23,2,0,35,23,716,581,20,14,857,693,5,5,133,76,Yes,5,5,0,2,5,5,0,0,5,2,149,110,2,5,166,129,0,0,11,5,Yes,14,11,2,2,8,14,0,0,8,5,299,284,8,8,339,324,0,0,11,5,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='510225002812',No,5102250.0,LOUDOUN CO PBLC SCHS,2812.0,WOODGROVE HIGH,Regular School,1,4,No,PK,12,Loudoun County,41,39.1542,-77.7271,other,295
12,HERITAGE HIGH,280,VA,VIRGINIA,5102250,LOUDOUN CO PBLC SCHS,2285,='510225002285',No,Yes,No,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,No,-9,No,No,No,No,116,122,2,2,83,77,0,0,65,71,374,404,41,41,681,717,44,29,136,70,Yes,14,20,2,0,17,26,0,0,8,5,83,89,8,8,132,148,0,0,8,5,Yes,20,35,2,0,29,44,0,0,8,20,128,182,20,17,207,298,0,2,8,5,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='510225002285',No,5102250.0,LOUDOUN CO PBLC SCHS,2285.0,HERITAGE HIGH,Regular School,1,4,No,PK,12,Loudoun County,41,39.0832,-77.57,other,280
18,Benjamin Franklin Senior High,228,CA,CALIFORNIA,622710,Los Angeles Unified,3021,='062271003021',No,No,No,No,No,No,No,No,Yes,Yes,No,Yes,Yes,Yes,Yes,No,-9,No,No,No,No,671,611,5,5,32,44,0,0,5,8,20,8,2,2,735,678,104,65,121,64,Yes,65,146,0,0,5,8,0,0,0,0,2,2,0,0,72,156,0,2,8,2,Yes,119,158,0,0,11,20,0,0,2,0,2,2,0,0,134,180,2,5,2,8,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='062271003021',Yes,622710.0,Los Angeles Unified,3021.0,Benjamin Franklin Senior High,Regular School,1,4,No,6,12,Los Angeles County,11,34.116,-118.199,other,228
24,PINE VIEW SCHOOL,201,FL,FLORIDA,1201680,SARASOTA,1836,='120168001836',No,No,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,-9,Yes,No,No,No,107,101,2,2,110,158,0,0,5,8,716,833,68,68,1008,1170,2,0,43,16,Yes,11,8,0,0,8,14,0,0,0,2,71,74,8,5,98,103,0,0,0,0,Yes,32,29,0,2,26,50,0,0,2,2,209,266,17,23,286,372,0,0,2,2,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='120168001836',No,1201680.0,SARASOTA,1836.0,PINE VIEW SCHOOL,Special Education School,2,4,No,2,12,Sarasota County,21,27.186,-82.4799,other,201
42,TUSCARORA HIGH,164,VA,VIRGINIA,5102250,LOUDOUN CO PBLC SCHS,2817,='510225002817',No,Yes,No,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,No,-9,No,No,No,No,212,155,2,2,56,62,0,2,80,71,425,470,44,50,819,812,74,35,112,67,Yes,11,5,0,0,5,11,0,0,2,8,50,65,2,5,70,94,2,0,2,5,Yes,44,41,2,2,32,35,0,2,17,29,194,287,14,26,303,422,2,0,14,8,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='510225002817',No,5102250.0,LOUDOUN CO PBLC SCHS,2817.0,TUSCARORA HIGH,Regular School,1,4,No,PK,12,Loudoun County,41,39.1321,-77.5571,other,164
49,ROCK RIDGE HIGH,152,VA,VIRGINIA,5102250,LOUDOUN CO PBLC SCHS,2969,='510225002969',No,Yes,No,No,No,No,No,No,No,No,No,Yes,Yes,Yes,Yes,No,-9,No,No,No,No,59,71,0,2,221,221,0,0,92,68,266,263,29,38,667,663,26,14,79,43,Yes,8,8,0,0,35,35,0,0,2,8,26,26,2,2,73,79,0,2,2,0,Yes,11,23,0,2,104,113,0,0,17,20,80,92,8,17,220,267,2,2,5,2,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='510225002969',No,5102250.0,LOUDOUN CO PBLC SCHS,2969.0,ROCK RIDGE HIGH,Regular School,1,4,No,PK,12,Loudoun County,41,38.9763,-77.5025,other,152
53,Cardinal Heights Upper Middle,146,WI,WISCONSIN,5514640,Sun Prairie Area School District,2810,='551464002810',No,No,No,No,No,No,No,No,No,No,Yes,Yes,No,No,No,No,-9,No,No,No,No,59,50,0,2,47,29,0,0,62,62,419,371,41,44,628,558,35,20,82,40,Yes,5,11,0,0,8,5,0,0,2,5,35,65,5,5,55,91,2,0,5,0,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='551464002810',Yes,5514640.0,Sun Prairie Area School District,2810.0,Cardinal Heights Upper Middle,Regular School,1,4,No,8,9,Dane County,21,43.1785,-89.2192,other,146
56,Johnston Middle School,137,IA,IOWA,1915450,Johnston Comm School District,926,='191545000926',No,No,No,No,No,No,No,No,No,No,Yes,Yes,No,No,No,No,-9,No,No,No,No,29,29,2,0,29,41,0,0,26,20,413,407,26,23,525,520,23,23,55,40,Yes,2,2,0,0,8,5,0,0,2,2,83,29,2,2,97,40,2,0,2,0,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='191545000926',No,1915450.0,Johnston Comm School District,926.0,Johnston Middle School,Regular School,1,4,No,8,9,Polk County,41,41.675,-93.7019,other,137
65,RIVER RIDGE HIGH SCHOOL,121,FL,FLORIDA,1201530,PASCO,2882,='120153002882',No,Yes,No,No,No,No,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,-9,No,No,No,No,74,77,5,5,14,17,2,0,17,17,665,671,26,35,803,822,5,5,109,76,Yes,2,5,0,2,0,2,0,0,2,2,50,56,0,0,54,67,0,0,2,0,Yes,11,23,2,2,5,8,2,0,0,2,140,191,5,8,165,234,2,0,2,2,No,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,='120153002882',Yes,1201530.0,PASCO,2882.0,RIVER RIDGE HIGH SCHOOL,Regular School,1,4,No,6,12,Pasco County,21,28.2671,-82.6226,other,121


**Virtual Schools**

In [34]:
print(match_schools_with_de.VIRTUAL.value_counts())
print()
print(match_schools_with_de.groupby('VIRTUAL').de_total_enrollment.sum())

No         2016
Missing     446
Yes         148
Name: VIRTUAL, dtype: int64

VIRTUAL
Missing    17423
No         81591
Yes         2801
Name: de_total_enrollment, dtype: int64


**School Type**<br>
1 = Regular school, 2 = Special education school, 3 = Vocational school, 4 = Other/alternative school


In [35]:
print(match_schools_with_de.SCH_TYPE.value_counts())
print()
print(match_schools_with_de.groupby('SCH_TYPE').de_total_enrollment.sum())

1.0    1885
4.0     464
3.0     212
2.0      49
Name: SCH_TYPE, dtype: int64

SCH_TYPE
1.0    62845
2.0     1167
3.0    26577
4.0    11226
Name: de_total_enrollment, dtype: int64


In [68]:
vocational_schools = match_schools_with_de[match_schools_with_de.SCH_TYPE == 3.0]
vocational_schools.head()

Unnamed: 0,SCH_NAME,de_total_enrollment,LEA_STATE,LEA_STATE_NAME,LEAID,LEA_NAME,SCHID,COMBOKEY,JJ,SCH_GRADE_PS,...,SCH_TYPE_TEXT,SCH_TYPE,LEVEL,VIRTUAL,GSLO,GSHI,NMCNTY15,LOCALE15,LAT1516,LON1516
1,Passaic County Technical Institute,1463,NJ,NEW JERSEY,3412630,Passsaic County Vocational School District,4860,='341263004860',No,No,...,Vocational Education School,3,3,No,9,12,Passaic County,21,40.9298,-74.2036
3,McKenzie Career Center,1205,IN,INDIANA,1805670,M S D Lawrence Township,725,='180567000725',No,No,...,Vocational Education School,3,4,No,PK,12,Marion County,11,39.8923,-86.0411
4,Bergen County Academies,1014,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,250,='340147000250',No,No,...,Vocational Education School,3,3,No,9,12,Bergen County,21,40.9022,-74.0343
6,Penta Career Center - on campus,686,OH,OHIO,3905135,Penta Career Center - District,4080,='390513504080',No,No,...,Vocational Education School,3,3,No,10,12,Wood County,21,41.5837,-83.5721
7,Puget Sound Skills Center,682,WA,WASHINGTON,5303540,Highline School District,2103,='530354002103',No,No,...,Vocational Education School,3,3,Missing,10,12,King County,21,47.4411,-122.323


**Filtered schools with the most DE-students**

In [36]:
match_schools_with_de.sort_values('de_total_enrollment', ascending=False).head(125)

Unnamed: 0,SCH_NAME,de_total_enrollment,LEA_STATE,LEA_STATE_NAME,LEAID,LEA_NAME,SCHID,COMBOKEY,JJ,SCH_GRADE_PS,...,SCH_TYPE_TEXT,SCH_TYPE,LEVEL,VIRTUAL,GSLO,GSHI,NMCNTY15,LOCALE15,LAT1516,LON1516
0,BROOKLYN TECHNICAL HIGH SCHOOL,3778,NY,NEW YORK,3620580,NEW YORK CITY PUBLIC SCHOOLS,1928,='362058001928',No,No,...,,3,3,No,9,12,Kings County,11,40.6889,-73.9766
1,Passaic County Technical Institute,1463,NJ,NEW JERSEY,3412630,Passsaic County Vocational School District,4860,='341263004860',No,No,...,Vocational Education School,3,3,No,9,12,Passaic County,21,40.9298,-74.2036
2,Basha High School,1278,AZ,ARIZONA,401870,Chandler Unified District #80,2250,='040187002250',No,No,...,Regular School,1,4,No,6,12,Maricopa County,21,33.2212,-111.758
3,McKenzie Career Center,1205,IN,INDIANA,1805670,M S D Lawrence Township,725,='180567000725',No,No,...,Vocational Education School,3,4,No,PK,12,Marion County,11,39.8923,-86.0411
4,Bergen County Academies,1014,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,250,='340147000250',No,No,...,Vocational Education School,3,3,No,9,12,Bergen County,21,40.9022,-74.0343
5,South Kitsap High School,752,WA,WASHINGTON,5308160,South Kitsap School District,1357,='530816001357',No,No,...,Regular School,1,4,Missing,3,12,Kitsap County,22,47.5385,-122.626
6,Penta Career Center - on campus,686,OH,OHIO,3905135,Penta Career Center - District,4080,='390513504080',No,No,...,Vocational Education School,3,3,No,10,12,Wood County,21,41.5837,-83.5721
7,Puget Sound Skills Center,682,WA,WASHINGTON,5303540,Highline School District,2103,='530354002103',No,No,...,Vocational Education School,3,3,Missing,10,12,King County,21,47.4411,-122.323
8,Bergen County Technical High School - Teterboro,631,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,262,='340147000262',No,No,...,Vocational Education School,3,3,No,9,12,Bergen County,21,40.8594,-74.0543
9,COLORADO EARLY COLLEGE FORT COLLINS,603,CO,COLORADO,800020,State Charter School Institute,6542,='080002006542',No,No,...,Regular School,1,4,No,6,12,Larimer County,12,40.5198,-105.053
