In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from my_functions import combokey_converter

%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')

In [9]:
raw = pd.read_csv('../filtered_data/00_crdc_1516_initial.csv', 
                        dtype = {'LEAID':np.object})

In [26]:
filtered = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})

In [27]:
raw['COMBOKEY'] = combokey_converter.convert(raw, 'LEAID', 'SCHID')

In [54]:
filtered_combo = filtered[['COMBOKEY','LAT1516']]

In [58]:
filtered_raw_joined = pd.merge(raw, filtered_combo, on = 'COMBOKEY', how = 'left')
filteded_from_raw = filtered_raw_joined.copy()
filtered_from_raw = filtered_from_raw[filtered_from_raw.LAT1516.isnull()]

In [59]:
len(filtered_from_raw)

80635

In [55]:
filtered_from_raw.columns.values

array(['LEA_STATE', 'LEA_STATE_NAME', 'LEAID', 'LEA_NAME', 'SCHID',
       'SCH_NAME', 'COMBOKEY', 'JJ', 'SCH_GRADE_PS', 'SCH_GRADE_KG',
       'SCH_GRADE_G01', 'SCH_GRADE_G02', 'SCH_GRADE_G03', 'SCH_GRADE_G04',
       'SCH_GRADE_G05', 'SCH_GRADE_G06', 'SCH_GRADE_G07', 'SCH_GRADE_G08',
       'SCH_GRADE_G09', 'SCH_GRADE_G10', 'SCH_GRADE_G11', 'SCH_GRADE_G12',
       'SCH_GRADE_UG', 'SCH_UGDETAIL_HS', 'SCH_STATUS_SPED',
       'SCH_STATUS_MAGNET', 'SCH_STATUS_CHARTER', 'SCH_STATUS_ALT',
       'SCH_ENR_HI_M', 'SCH_ENR_HI_F', 'SCH_ENR_AM_M', 'SCH_ENR_AM_F',
       'SCH_ENR_AS_M', 'SCH_ENR_AS_F', 'SCH_ENR_HP_M', 'SCH_ENR_HP_F',
       'SCH_ENR_BL_M', 'SCH_ENR_BL_F', 'SCH_ENR_WH_M', 'SCH_ENR_WH_F',
       'SCH_ENR_TR_M', 'SCH_ENR_TR_F', 'TOT_ENR_M', 'TOT_ENR_F',
       'SCH_ENR_LEP_M', 'SCH_ENR_LEP_F', 'SCH_ENR_IDEA_M',
       'SCH_ENR_IDEA_F', 'SCH_DUAL_IND', 'SCH_DUALENR_HI_M',
       'SCH_DUALENR_HI_F', 'SCH_DUALENR_AM_M', 'SCH_DUALENR_AM_F',
       'SCH_DUALENR_AS_M', 'SCH_DUALENR_AS_F

# Analysis

In [62]:
def missing_value_mapper(value):
    """Converts any negative number into 0, as these negative numbers represent missing/null values"""
    if isinstance(value, int):
        if value < 0:
            return 0
    return value

filtered_from_raw = filtered_from_raw.applymap(missing_value_mapper)

In [63]:
filtered_from_raw['de_total_enrollment'] = filtered_from_raw['TOT_DUALENR_M'] + filtered_from_raw['TOT_DUALENR_F']

### DE

In [68]:
schools_with_de_students = filtered_from_raw.copy()
schools_with_de_students = schools_with_de_students[schools_with_de_students.de_total_enrollment > 0]

** Fiiltered-Schools with DE students **

In [97]:
first_columns = ['SCH_NAME', 'de_total_enrollment']
reorder = first_columns + [c for c in schools_with_de_students.columns if c not in first_columns]
schools_with_de_students = schools_with_de_students[reorder].sort_values('de_total_enrollment', ascending = False)
schools_with_de_students

Unnamed: 0,SCH_NAME,de_total_enrollment,LEA_STATE,LEA_STATE_NAME,LEAID,LEA_NAME,SCHID,COMBOKEY,JJ,SCH_GRADE_PS,...,TOT_IBENR_M,TOT_IBENR_F,SCH_IBENR_LEP_M,SCH_IBENR_LEP_F,SCH_IBENR_IDEA_M,SCH_IBENR_IDEA_F,LAT1516,LON1516,LOCALE15,NMCNTY15
59112,BROOKLYN TECHNICAL HIGH SCHOOL,3778,NY,NEW YORK,3620580,NEW YORK CITY PUBLIC SCHOOLS,1928,='362058001928',No,No,...,0,0,0,0,0,0,,,,
55306,Passaic County Technical Institute,1463,NJ,NEW JERSEY,3412630,Passsaic County Vocational School District,4860,='341263004860',No,No,...,0,0,0,0,0,0,,,,
2671,Basha High School,1278,AZ,ARIZONA,401870,Chandler Unified District #80,2250,='040187002250',No,No,...,0,0,0,0,0,0,,,,
31069,McKenzie Career Center,1205,IN,INDIANA,1805670,M S D Lawrence Township,725,='180567000725',No,No,...,2,9,0,0,0,0,,,,
53807,Bergen County Academies,1014,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,250,='340147000250',No,No,...,71,74,0,0,0,0,,,,
92581,South Kitsap High School,752,WA,WASHINGTON,5308160,South Kitsap School District,1357,='530816001357',No,No,...,0,0,0,0,0,0,,,,
68466,Penta Career Center - on campus,686,OH,OHIO,3905135,Penta Career Center - District,4080,='390513504080',No,No,...,0,0,0,0,0,0,,,,
91522,Puget Sound Skills Center,682,WA,WASHINGTON,5303540,Highline School District,2103,='530354002103',No,No,...,0,0,0,0,0,0,,,,
53809,Bergen County Technical High School - Teterboro,631,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,262,='340147000262',No,No,...,0,0,0,0,0,0,,,,
15147,COLORADO EARLY COLLEGE FORT COLLINS,603,CO,COLORADO,800020,State Charter School Institute,6542,='080002006542',No,No,...,0,0,0,0,0,0,,,,


**Total DE Students**

In [132]:
"""Total DE Students"""
schools_with_de_students.de_total_enrollment.sum()

104229

**Juvenile Justice**

In [153]:
"""Juvenile Justice Schools - How many schools and DE students?"""
print(schools_with_de_students.JJ.value_counts())
print()
print(str(schools_with_de_students[schools_with_de_students.JJ == 'Yes'].de_total_enrollment.sum()), "DE students.")

No     2678
Yes      15
Name: JJ, dtype: int64

388 DE students.


**Alternative**

In [154]:
"""Alternative Schools - How many schools and DE students?"""
print(schools_with_de_students.SCH_STATUS_ALT.value_counts())
print()
print(str(schools_with_de_students[schools_with_de_students.SCH_STATUS_ALT == 'Yes'].de_total_enrollment.sum()), "DE students.")

No     2340
Yes     353
Name: SCH_STATUS_ALT, dtype: int64

7555 DE students.


**Special Education**

In [156]:
"""Special Education - How many schools and DE students?"""
print(schools_with_de_students.SCH_STATUS_SPED.value_counts())
print()
print(str(schools_with_de_students[schools_with_de_students.SCH_STATUS_SPED == 'Yes'].de_total_enrollment.sum()), "DE students.")

No     2610
Yes      83
Name: SCH_STATUS_SPED, dtype: int64

2903 DE students.


**Grade Ranges**

In [238]:
def school_grade_range(df):
    """Calculates the number of schools in a given DataFrame (df) of a particular distribution of grades:
        Example:  If a school (in df) offers grades 9,10,11,12 exclusively, they are placed in the '9-12' 
                    category. """
    df_grade_range = []
    for index, row in df.iterrows():
        grade_range = []

        if row.SCH_GRADE_G12 == 'Yes':
            grade_range.append(12)
        if row.SCH_GRADE_G11 == 'Yes':
            grade_range.append(11)
        if row.SCH_GRADE_G10 == 'Yes':
            grade_range.append(10)
        if row.SCH_GRADE_G09 == 'Yes':
            grade_range.append(9)
        if row.SCH_GRADE_G08 == 'Yes':
            grade_range.append(8)
        if row.SCH_GRADE_G07 == 'Yes':
            grade_range.append(7)
        if row.SCH_GRADE_G06 == 'Yes':
            grade_range.append(6)
        if row.SCH_GRADE_G05 == 'Yes':
            grade_range.append(5)
        if row.SCH_GRADE_G04 == 'Yes':
            grade_range.append(4)
        if row.SCH_GRADE_G03 == 'Yes':
            grade_range.append(3)
        if row.SCH_GRADE_G02 == 'Yes':
            grade_range.append(2)
        if row.SCH_GRADE_G01 == 'Yes':
            grade_range.append(1)
        if row.SCH_GRADE_KG == 'Yes':
            grade_range.append('kg')
        if row.SCH_GRADE_PS == 'Yes':
            grade_range.append('pk')
        
        if grade_range == [12,11,10,9,8,7,6,5,4,3,2,1,'kg','pk']:
            df_grade_range.append('pk-12')
        elif grade_range == [12,11,10,9,8,7,6,5,4,3,2,1,'kg']:
            df_grade_range.append('kg-12')
        elif grade_range == [12,11,10,9,8,7,6,5]:
            df_grade_range.append('05-12')
        elif grade_range == [12,11,10,9,8,7,6]:
            df_grade_range.append('06-12')
        elif grade_range == [12,11,10,9,8,7]:
            df_grade_range.append('07-12')
        elif grade_range == [12,11,10,9,8]:
            df_grade_range.append('08-12')
        elif grade_range == [12,11,10,9]:
            df_grade_range.append('09-12')
        elif grade_range == [11,10,9]:
            df_grade_range.append('09-11')
        elif grade_range == [10,9]:
            df_grade_range.append('09-10')
        elif grade_range == [12,11,10]:
            df_grade_range.append('10-12')
        elif grade_range == [12,11]:
            df_grade_range.append('11-12')
        elif grade_range == [12]:
            df_grade_range.append('12-only')
        else:
            df_grade_range.append('other')
    return pd.DataFrame(df_grade_range, columns=['grade_range'])

In [244]:
de_stud_school_grade_range_list = school_grade_range(schools_with_de_students).join(schools_with_de_students['de_total_enrollment'].reset_index(drop = True), how = 'outer')

de_stud_school_grade_range_dist = de_stud_school_grade_range_list['grade_range'].value_counts().reset_index().sort_values('index').set_index('index')
de_stud_school_grade_range_dist['pct_of_schools'] = round(de_stud_school_grade_range_dist['grade_range'] / len(de_stud_school_grade_range_list), 3)

de_stud_school_grade_range_enrollments = pd.DataFrame(de_stud_school_grade_range_list.groupby('grade_range')['de_total_enrollment'].sum())

de_stud_school_grade_range_dist.join(de_stud_school_grade_range_enrollments).rename({'grade_range': '# schools'}, axis = 1)

Unnamed: 0_level_0,# schools,pct_of_schools,de_total_enrollment
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
05-12,34,0.013,1516
06-12,666,0.247,25242
07-12,99,0.037,2722
08-12,27,0.01,1399
09-10,15,0.006,975
09-11,22,0.008,1185
09-12,548,0.203,36343
10-12,77,0.029,4366
11-12,54,0.02,3331
12-only,8,0.003,49


## Get NCES information and join with schools_with_de_students

In [162]:
schools_with_de_students = schools_with_de_students.drop(['LAT1516', 'LON1516', 'LOCALE15', 'NMCNTY15'], axis = 1)

In [158]:
nces_1516_full = pd.read_csv('../filtered_data/01_nces_1516_initial_combined_ccd.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [186]:
schools_with_de_students_nces = pd.merge(schools_with_de_students, nces_1516_full, left_on = 'COMBOKEY', right_on = 'combokey', how = 'left', suffixes = ('', '_nces'))

In [247]:
non_matching_schools_with_de = schools_with_de_students_nces[schools_with_de_students_nces.LEVEL.isnull()]
match_schools_with_de = schools_with_de_students_nces[schools_with_de_students_nces.LEVEL.notnull()]

In [248]:
print(match_schools_with_de.de_total_enrollment.sum())
print(non_matching_schools_with_de.de_total_enrollment.sum())

93901
10328


**Recover some of the non-nces-matching schools**

In [249]:
"""Passing in my recovered_schools from 00_initial_filter"""
%store -r recovered_schools_all

In [250]:
recovered_schools_all = recovered_schools_all.reset_index()
nces_columns = ['COMBOKEY', 'SCH_TYPE', 'LEVEL', 'VIRTUAL', 'GSLO', 'GSHI',
       'NMCNTY15', 'LOCALE15', 'LAT1516', 'LON1516']
recovered_schools_all_nces = recovered_schools_all[nces_columns]

In [251]:
non_matching_schools_with_de = non_matching_schools_with_de.drop(['SCH_TYPE', 'LEVEL', 'VIRTUAL', 'GSLO', 'GSHI',
       'NMCNTY15', 'LOCALE15', 'LAT1516', 'LON1516'],axis = 1)

In [252]:
recovered_non_matchings = pd.merge(non_matching_schools_with_de, recovered_schools_all_nces, on='COMBOKEY')

In [256]:
print(recovered_non_matchings.de_total_enrollment.sum(), "DE students Recovered.")

7914 DE students Recovered.


In [257]:
match_schools_with_de = match_schools_with_de.append(recovered_non_matchings)

In [258]:
print(format(match_schools_with_de.de_total_enrollment.sum(), ",d"), "out of", format(schools_with_de_students.de_total_enrollment.sum(),',d'), 
     "DE Students Accounted for.")

109,729 out of 104,229 DE Students Accounted for.


**Filtered Schools that match with NCES**

**LEVEL**<br>
1 = Primary (low grade = PK through 03; high grade = PK through 08), 2 = Middle (low grade = 04 through 07; high grade = 04 through 09), 3 = High (low grade = 07 through 12; high grade = 12 only), 4 = Other (any other configuration not falling within the above three categories;including ungraded), N = Not applicable


In [259]:
"""How many schools and de students in each LEVEL of school"""
print(match_schools_with_de.LEVEL.value_counts().reset_index().sort_values('index').set_index('index'))
print()
print(str(match_schools_with_de.groupby('LEVEL').de_total_enrollment.sum()), 'DE students.')

       LEVEL
index       
1         24
2         26
3        668
4       1943
N         25

LEVEL
1      295
2     1609
3    43559
4    63237
N     1029
Name: de_total_enrollment, dtype: int64 DE students.


**Virtual Schools**

In [260]:
print(match_schools_with_de.VIRTUAL.value_counts())
print()
print(match_schools_with_de.groupby('VIRTUAL').de_total_enrollment.sum())

No         2085
Missing     449
Yes         152
Name: VIRTUAL, dtype: int64

VIRTUAL
Missing    17433
No         89460
Yes         2836
Name: de_total_enrollment, dtype: int64


**School Type**<br>
1 = Regular school, 2 = Special education school, 3 = Vocational school, 4 = Other/alternative school


In [261]:
print(match_schools_with_de.SCH_TYPE.value_counts())
print()
print(match_schools_with_de.groupby('SCH_TYPE').de_total_enrollment.sum())

1.0    1949
4.0     467
3.0     221
2.0      49
Name: SCH_TYPE, dtype: int64

SCH_TYPE
1.0    66152
2.0     1167
3.0    31129
4.0    11281
Name: de_total_enrollment, dtype: int64


In [263]:
match_schools_with_de.sort_values('de_total_enrollment', ascending=False).head(100)

Unnamed: 0,SCH_NAME,de_total_enrollment,LEA_STATE,LEA_STATE_NAME,LEAID,LEA_NAME,SCHID,COMBOKEY,JJ,SCH_GRADE_PS,...,SCH_TYPE_TEXT,SCH_TYPE,LEVEL,VIRTUAL,GSLO,GSHI,NMCNTY15,LOCALE15,LAT1516,LON1516
0,brooklyn technical high school,3778,NY,NEW YORK,3620580,NEW YORK CITY PUBLIC SCHOOLS,1928,='362058001928',No,No,...,,3,3,No,9,12,Kings County,11,40.6889,-73.9766
0,brooklyn technical high school,3778,NY,NEW YORK,3620580,NEW YORK CITY PUBLIC SCHOOLS,1928,='362058001928',No,No,...,,3,3,No,9,12,Kings County,11,40.6889,-73.9766
1,passaic county technical institute,1463,NJ,NEW JERSEY,3412630,Passsaic County Vocational School District,4860,='341263004860',No,No,...,Vocational Education School,3,3,No,9,12,Passaic County,21,40.9298,-74.2036
2,basha high school,1278,AZ,ARIZONA,401870,Chandler Unified District #80,2250,='040187002250',No,No,...,Regular School,1,4,No,6,12,Maricopa County,21,33.2212,-111.758
3,mckenzie career center,1205,IN,INDIANA,1805670,M S D Lawrence Township,725,='180567000725',No,No,...,Vocational Education School,3,4,No,PK,12,Marion County,11,39.8923,-86.0411
4,bergen county academies,1014,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,250,='340147000250',No,No,...,Vocational Education School,3,3,No,9,12,Bergen County,21,40.9022,-74.0343
5,south kitsap high school,752,WA,WASHINGTON,5308160,South Kitsap School District,1357,='530816001357',No,No,...,Regular School,1,4,Missing,3,12,Kitsap County,22,47.5385,-122.626
6,penta career center - on campus,686,OH,OHIO,3905135,Penta Career Center - District,4080,='390513504080',No,No,...,Vocational Education School,3,3,No,10,12,Wood County,21,41.5837,-83.5721
7,puget sound skills center,682,WA,WASHINGTON,5303540,Highline School District,2103,='530354002103',No,No,...,Vocational Education School,3,3,Missing,10,12,King County,21,47.4411,-122.323
8,bergen county technical high school - teterboro,631,NJ,NEW JERSEY,3401470,Bergen County Vocational Technical School Dist...,262,='340147000262',No,No,...,Vocational Education School,3,3,No,9,12,Bergen County,21,40.8594,-74.0543
