# <u> NACEP </u>
## 2015-16 CRDC
## Filtered Schools Analysis
#### Alijah O'Connor - 2018
------------------------------------------------------------------------------------
---

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from my_functions import combokey_converter

%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')

In [2]:
raw = pd.read_csv('../filtered_data/00_crdc_1516_initial.csv', 
                        dtype = {'LEAID':np.object})

In [3]:
filtered = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})

In [4]:
"""How long is the raw dataset?"""
raw['COMBOKEY'] = combokey_converter.convert(raw, 'LEAID', 'SCHID')
len(raw)

96360

In [5]:
filtered_combo = filtered[['COMBOKEY','LAT1516']]

In [6]:
filtered_raw_joined = pd.merge(raw, filtered_combo, on = 'COMBOKEY', how = 'left')
filtered_from_raw = filtered_raw_joined.copy()
filtered_from_raw = filtered_from_raw[filtered_from_raw.LAT1516.isnull()]
filtered_from_raw = filtered_from_raw.drop('LAT1516', axis = 1)

# Set Up

In [7]:
from my_functions.extra_functions import students_in_11_or_12
filtered_from_raw['Students_in_11_12'] = filtered_from_raw \
                                            .apply(lambda row: students_in_11_or_12(row['SCH_GRADE_G11'],
                                                                                    row['SCH_GRADE_G12']),
                                                   axis = 1)

In [8]:
def missing_value_mapper(value):
    """Converts any negative number into 0, as these negative numbers represent missing/null values"""
    if isinstance(value, int):
        if value < 0:
            return 0
    return value

filtered_from_raw = filtered_from_raw.applymap(missing_value_mapper)

In [9]:
filtered_from_raw['total_enrollment'] = filtered_from_raw['TOT_ENR_M'] + filtered_from_raw['TOT_ENR_F']
from my_functions.extra_functions import hs_enrollment_averager
filtered_from_raw['total_hs_enrollment'] = hs_enrollment_averager(filtered_from_raw)
filtered_from_raw['de_total_enrollment'] = filtered_from_raw['TOT_DUALENR_M'] + filtered_from_raw['TOT_DUALENR_F']

In [86]:
"""How many total filtered out schools?"""
len(filtered_from_raw)

77693

## Get NCES information

In [11]:
nces_1516_full = pd.read_csv('../filtered_data/01_nces_1516_initial_combined_ccd.csv')
nces_important_columns = ['combokey', 'SCH_TYPE', 'LEVEL', 'VIRTUAL', 'GSLO', 'GSHI',
       'NMCNTY15', 'LOCALE15', 'LAT1516', 'LON1516']
nces_1516_full = nces_1516_full[nces_important_columns]

  interactivity=interactivity, compiler=compiler, result=result)


In [12]:
filtered_from_raw_with_nces = pd.merge(filtered_from_raw, nces_1516_full, left_on = 'COMBOKEY', 
                                       right_on = 'combokey', how = 'left', suffixes = ('', '_nces'))
# filtered_from_raw_with_nces.to_csv('../filtered_data/04_filtered_out_schools.csv')

In [13]:
non_matching_total = filtered_from_raw_with_nces[filtered_from_raw_with_nces.LEVEL.isnull()]
matching_schools_total = filtered_from_raw_with_nces[filtered_from_raw_with_nces.LEVEL.notnull()]

# Analysis

### - Filtered-out Schools

In [72]:
"""Breakdown of All schools with matching NCES tags"""
total_jj_schools = matching_schools_total[matching_schools_total['JJ'] == 'Yes']
total_alt_schools = matching_schools_total[matching_schools_total['SCH_STATUS_ALT'] == 'Yes']
total_sped_schools = matching_schools_total[matching_schools_total['SCH_STATUS_SPED'] == 'Yes']
total_non_hs_voc_schools = matching_schools_total[(matching_schools_total['LEVEL'] == '1') | 
                                       (matching_schools_total['LEVEL'] == '2') | 
                                      (matching_schools_total['LEVEL'] == 'N')]
total_virtual_schools = matching_schools_total[matching_schools_total['VIRTUAL'] == 'Yes']
total_nces_sped_schools = matching_schools_total[matching_schools_total['SCH_TYPE'] == 2]
total_nces_alt_other_schools = matching_schools_total[matching_schools_total['SCH_TYPE'] == 4]
total_without_11_12_schools = matching_schools_total[matching_schools_total['Students_in_11_12'] == 'No']

In [73]:
"""Number of Schools in Each Category"""
total_filtered_schools = len(filtered_from_raw)
total_nces_matching_schools = len(matching_schools_total)
total_nces_non_matching_schools = len(non_matching_total)

total_jj_school_count = len(total_jj_schools)
total_alt_school_count = len(total_alt_schools)
total_sped_school_count = len(total_sped_schools)
total_non_hs_voc_school_count = len(total_non_hs_voc_schools)
total_virtual_school_count = len(total_virtual_schools)
total_nces_sped_school_count = len(total_nces_sped_schools)
total_nces_alt_other_school_count = len(total_nces_alt_other_schools)
total_without_11_12_school_count = len(total_without_11_12_schools)

### - Filtered Out HS Students

In [84]:
"""Number of High Schoolers in those Schools"""
total_hs = filtered_from_raw_with_nces.total_hs_enrollment.sum()
matching_hs = matching_schools_total.total_hs_enrollment.sum()
non_matching_hs = non_matching_total.total_hs_enrollment.sum()

total_jj_school_hs = total_jj_schools.total_hs_enrollment.sum()
total_alt_school_hs = total_alt_schools.total_hs_enrollment.sum()
total_sped_school_hs = total_sped_schools.total_hs_enrollment.sum()
total_non_hs_voc_school_hs = total_non_hs_voc_schools.total_hs_enrollment.sum()
total_virtual_school_hs = total_virtual_schools.total_hs_enrollment.sum()
total_nces_sped_school_hs = total_nces_sped_schools.total_hs_enrollment.sum()
total_nces_alt_other_school_hs = total_nces_alt_other_schools.total_hs_enrollment.sum()
total_without_11_12_school_hs = total_without_11_12_schools.total_hs_enrollment.sum()

### - Filtered Out DE Students

In [88]:
match_schools_with_de = matching_schools_total.copy()
match_schools_with_de = match_schools_with_de[match_schools_with_de.de_total_enrollment > 0]

In [90]:
"""Filtered out DE Students"""
total_filtered_out_de_students = filtered_from_raw_with_nces.de_total_enrollment.sum()
de_students_in_NCES_matching_schools =  match_schools_with_de.de_total_enrollment.sum()
de_students_in_NCES_non_matching_schools = total_filtered_out_de_students - de_students_in_NCES_matching_schools

jj_de_students = match_schools_with_de[match_schools_with_de['JJ'] == 'Yes'].de_total_enrollment.sum()
alt_de_students_crdc = match_schools_with_de[match_schools_with_de.SCH_STATUS_ALT == 'Yes'].de_total_enrollment.sum()
sped_de_students_crdc = match_schools_with_de[match_schools_with_de.SCH_STATUS_SPED == 'Yes'].de_total_enrollment.sum()
level_1_2_N_DE_Students = match_schools_with_de[(match_schools_with_de.LEVEL == '1') | 
                                               (match_schools_with_de.LEVEL == '2') | 
                                               (match_schools_with_de.LEVEL == 'N')].de_total_enrollment.sum()
virtual_de_students = match_schools_with_de[match_schools_with_de['VIRTUAL'] == 'Yes'].de_total_enrollment.sum()
sped_de_students_nces = match_schools_with_de[match_schools_with_de['SCH_TYPE'] == 2].de_total_enrollment.sum()
alt_de_students_nces = match_schools_with_de[match_schools_with_de['SCH_TYPE'] == 4].de_total_enrollment.sum()
de_students_schools_without_11_12 = match_schools_with_de[match_schools_with_de.Students_in_11_12 == 'No']\
                                        .de_total_enrollment.sum()

## Where are the Filtered Students?

In [91]:
de_student_list = [total_filtered_out_de_students, de_students_in_NCES_matching_schools, de_students_in_NCES_non_matching_schools,
                              jj_de_students, alt_de_students_crdc, sped_de_students_crdc, de_students_schools_without_11_12,
                              virtual_de_students, sped_de_students_nces, alt_de_students_nces, level_1_2_N_DE_Students]
filtered_out_schools = [total_filtered_schools, total_nces_matching_schools, total_nces_non_matching_schools,
                        total_jj_school_count, total_alt_school_count, total_sped_school_count, 
                        total_without_11_12_school_count, total_virtual_school_count, total_nces_sped_school_count,
                        total_nces_alt_other_school_count, total_non_hs_voc_school_count,]
total_hs_list = [total_hs, matching_hs, non_matching_hs,
                        total_jj_school_hs, total_alt_school_hs, total_sped_school_hs, 
                        total_without_11_12_school_hs, total_virtual_school_hs, total_nces_sped_school_hs,
                        total_nces_alt_other_school_hs, total_non_hs_voc_school_hs,]
filtered_list = ['Total Filtered Out', 'NCES-Matching Schools', 'NCES-Non-Matching Schools', 'Juvenile Justice (CRDC)', 'Alternative Education (CRDC)',
                     'Special Education (CRDC)', 'Schools w/o 11th/12th grade (CRDC)', 'Virtual (NCES)', 
                     'Special Education (NCES)', 'Alternative/Other (NCES)', 'Schools Reporting as Elementary/Middle/Other (NCES)']

pd.DataFrame({'DE Students': de_student_list, 'Schools': filtered_out_schools, 'HS Students': total_hs_list},
            index = filtered_list)

Unnamed: 0,DE Students,HS Students,Schools
Total Filtered Out,28384,970253,77693
NCES-Matching Schools,25810,847505,74493
NCES-Non-Matching Schools,2574,122748,3200
Juvenile Justice (CRDC),363,16667,487
Alternative Education (CRDC),7406,260122,3018
Special Education (CRDC),2827,81562,1833
Schools w/o 11th/12th grade (CRDC),4553,233974,68925
Virtual (NCES),2766,100467,486
Special Education (NCES),1167,55229,1487
Alternative/Other (NCES),11171,362789,4296
