# <u> NACEP </u>
## 2015-16 CRDC
## AP- & DE-Only Schools:   Flag Analysis

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected = True)

%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')

In [2]:
hs = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})

In [3]:
hs.columns.values

array(['COMBOKEY', 'SCH_NAME', 'GSHI', 'GSLO', 'JJ', 'LAT1516', 'LEAID',
       'LEA_NAME', 'LEA_STATE', 'LEA_STATE_NAME', 'LEVEL', 'LOCALE15',
       'LON1516', 'NMCNTY15', 'SCHID', 'SCH_APENR_AM_F', 'SCH_APENR_AM_M',
       'SCH_APENR_AS_F', 'SCH_APENR_AS_M', 'SCH_APENR_BL_F',
       'SCH_APENR_BL_M', 'SCH_APENR_HI_F', 'SCH_APENR_HI_M',
       'SCH_APENR_HP_F', 'SCH_APENR_HP_M', 'SCH_APENR_IDEA_F',
       'SCH_APENR_IDEA_M', 'SCH_APENR_IND', 'SCH_APENR_LEP_F',
       'SCH_APENR_LEP_M', 'SCH_APENR_TR_F', 'SCH_APENR_TR_M',
       'SCH_APENR_WH_F', 'SCH_APENR_WH_M', 'SCH_DUALENR_AM_F',
       'SCH_DUALENR_AM_M', 'SCH_DUALENR_AS_F', 'SCH_DUALENR_AS_M',
       'SCH_DUALENR_BL_F', 'SCH_DUALENR_BL_M', 'SCH_DUALENR_HI_F',
       'SCH_DUALENR_HI_M', 'SCH_DUALENR_HP_F', 'SCH_DUALENR_HP_M',
       'SCH_DUALENR_IDEA_F', 'SCH_DUALENR_IDEA_M', 'SCH_DUALENR_LEP_F',
       'SCH_DUALENR_LEP_M', 'SCH_DUALENR_TR_F', 'SCH_DUALENR_TR_M',
       'SCH_DUALENR_WH_F', 'SCH_DUALENR_WH_M', 'SCH_DUAL_IND',
    

In [4]:
"""Total Enrollment Column"""
hs['total_enrollment'] = hs['TOT_ENR_M'] + hs['TOT_ENR_F']

In [5]:
def ap_only(ap_flag, de_flag):
    if ap_flag == 'Yes' and de_flag == 'No':
        return 1
    return 0

def de_only(ap_flag, de_flag):
    if ap_flag =='No' and de_flag == 'Yes':
        return 1
    return 0

def neither(ap_flag, de_flag):
    if ap_flag == 'No' and de_flag == 'No':
        return 1
    return 0

def both(ap_flag, de_flag):
    if ap_flag == 'Yes' and de_flag == 'Yes':
        return 1
    return 0

In [6]:
hs['ap_only'] = hs.apply(lambda row: ap_only(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
hs['de_only'] = hs.apply(lambda row: de_only(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
hs['neither_ap_de'] = hs.apply(lambda row: neither(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
hs['both_ap_de'] = hs.apply(lambda row: both(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)

In [7]:
hs.ap_only.value_counts()

0    15369
1     3298
Name: ap_only, dtype: int64

In [8]:
hs.de_only.value_counts()

0    14795
1     3872
Name: de_only, dtype: int64

In [9]:
hs.neither_ap_de.value_counts()

0    16628
1     2039
Name: neither_ap_de, dtype: int64

In [10]:
hs.both_ap_de.value_counts()

1    9458
0    9209
Name: both_ap_de, dtype: int64

In [11]:
hs_ap_only = hs.copy()
hs_ap_only = hs_ap_only[hs_ap_only.ap_only == 1]

In [12]:
hs_de_only = hs.copy()
hs_de_only = hs_de_only[hs_de_only.de_only == 1]

In [13]:
hs_neither_ap_de = hs.copy()
hs_neither_ap_de = hs_neither_ap_de[hs_neither_ap_de.neither_ap_de == 1]

In [14]:
hs_both_ap_de = hs.copy()
hs_both_ap_de = hs_both_ap_de[hs_both_ap_de.both_ap_de == 1]

# By State

In [15]:
"""Which states have many schools offering DE but not AP?"""
hs_de_only_state = hs_de_only.groupby('LEA_STATE')['LEAID'].count().sort_values(ascending = False)
hs_de_only_state = hs_de_only_state.rename('DE_Only')

In [16]:
"""Which states have many schools offering AP but not DE?"""
hs_ap_only_state = hs_ap_only.groupby('LEA_STATE')['LEAID'].count().sort_values(ascending = False)
hs_ap_only_state = hs_ap_only_state.rename('AP_Only')

In [17]:
"""Which states have many schools NOT offering AP or DE?"""
hs_neither_ap_de_state = hs_neither_ap_de.groupby('LEA_STATE')['LEAID'].count().sort_values(ascending = False)
hs_neither_ap_de_state = hs_neither_ap_de_state.rename('Neither_AP_DE')

In [18]:
"""Which states have many schools offering AP and DE?"""
hs_both_ap_de_state = hs_both_ap_de.groupby('LEA_STATE')['LEAID'].count().sort_values(ascending = False)
hs_both_ap_de_state = hs_both_ap_de_state.rename('Both_AP_DE')

In [19]:
by_state = pd.concat([hs_de_only_state, hs_ap_only_state, hs_neither_ap_de_state, hs_both_ap_de_state], axis = 1)
by_state.fillna(0).astype(int).sort_values('DE_Only', ascending = False)

Unnamed: 0,DE_Only,AP_Only,Neither_AP_DE,Both_AP_DE
TX,348,135,61,943
MO,281,23,45,200
OH,232,51,162,450
KS,200,14,37,80
IL,179,102,51,339
MI,166,52,61,396
NE,155,3,37,64
MN,154,47,87,144
IA,154,1,8,167
OK,125,56,80,184


# By Size

In [20]:
def school_sizer(enroll):
    if enroll < 100:
        return 1
    elif 100 <= enroll < 500:
        return 2
    elif 500 <= enroll < 1200:
        return 3
    return 4

In [21]:
hs_ap_only['size_group'] = hs_ap_only['total_enrollment'].apply(lambda x: school_sizer(x))
hs_de_only['size_group'] = hs_de_only['total_enrollment'].apply(lambda x: school_sizer(x))
hs_neither_ap_de['size_group'] = hs_neither_ap_de['total_enrollment'].apply(lambda x: school_sizer(x))
hs_both_ap_de['size_group'] = hs_both_ap_de['total_enrollment'].apply(lambda x: school_sizer(x))

In [22]:
hs_de_only_size = hs_de_only.groupby('size_group')['LEAID'].count()
hs_de_only_size.rename('DE_Only', inplace=True);

In [23]:
hs_ap_only_size = hs_ap_only.groupby('size_group')['LEAID'].count()
hs_ap_only_size.rename('AP_Only', inplace=True);

In [24]:
hs_neither_ap_de_size = hs_neither_ap_de.groupby('size_group')['LEAID'].count()
hs_neither_ap_de_size.rename('Neither_AP_DE', inplace=True);

In [25]:
hs_both_ap_de_size = hs_both_ap_de.groupby('size_group')['LEAID'].count()
hs_both_ap_de_size.rename('Both_AP_DE', inplace=True);

In [26]:
by_size = pd.concat([hs_de_only_size, hs_ap_only_size, hs_neither_ap_de_size, hs_both_ap_de_size], axis = 1)
by_size.rename({1:'<100', 2:'100-499', 3:'500-1199', 4:'>1200'})

Unnamed: 0_level_0,DE_Only,AP_Only,Neither_AP_DE,Both_AP_DE
size_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<100,610,77,676,99
100-499,2600,986,1103,2598
500-1199,584,1180,214,3328
>1200,78,1055,46,3433


# By Region

In [27]:
"""
1:  Middle States Commission on Higher Education - New York, New Jersey, Pennsylvania, Delaware, Maryland, the District of Columbia

2:  New England Association of Schools and Colleges - Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, and Vermont

3:  Higher Learning Commission - Arkansas, Arizona, Colorado, Iowa, Illinois, Indiana, Kansas, Michigan, Minnesota, Missouri, 
    North Dakota, Nebraska, New Mexico, Ohio, Oklahoma, South Dakota, Wisconsin, West Virginia, and Wyoming

4:  Northwest Commission on Colleges and Universities - Alaska, Idaho, Montana, Nevada, Oregon, Utah, and Washington

5:  Southern Association of Colleges and Schools - Alabama, Florida, Georgia, Kentucky, Louisiana, Mississippi, 
    North Carolina, South Carolina, Tennessee, Texas and Virginia

6:  Western Association of Schools and Colleges - California, Hawaii
"""

middle_states = "NY,NJ,PA,DE,MD,DC".split(',')
new_england = "CT,ME,MA,NH,RI,VT".split(',')
higher_learning_commission = "AR,AZ,CO,IA,IL,IN,KS,MI,MN,MO,ND,NE,NM,OH,OK,SD,WI,WV,WY".split(',')
northwest_commission = "AK,ID,MT,NV,OR,UT,WA".split(',')
southern_association = "AL,FL,GA,KY,LA,MS,NC,SC,TN,TX,VA".split(',')
western_association = "CA,HI".split(',')

In [28]:
def region_mapper(state):
    if state in middle_states:
        return "MSCHE"
    elif state in new_england:
        return "NEASC"
    elif state in higher_learning_commission:
        return "HLC"
    elif state in northwest_commission:
        return "NWCCU"
    elif state in southern_association:
        return "SACS"
    elif state in western_association:
        return "WASC"
    else:
        return "Messed Up"
    
hs_de_only['region_accred'] = hs_de_only['LEA_STATE'].apply(lambda x: region_mapper(x))
hs_ap_only['region_accred'] = hs_ap_only['LEA_STATE'].apply(lambda x: region_mapper(x))
hs_neither_ap_de['region_accred'] = hs_neither_ap_de['LEA_STATE'].apply(lambda x: region_mapper(x))
hs_both_ap_de['region_accred'] = hs_both_ap_de['LEA_STATE'].apply(lambda x: region_mapper(x))

In [29]:
hs_de_only_region = hs_de_only.groupby('region_accred')['LEAID'].count()
hs_de_only_region.rename('DE_Only', inplace=True);

In [30]:
hs_ap_only_region = hs_ap_only.groupby('region_accred')['LEAID'].count()
hs_ap_only_region.rename('AP_Only', inplace=True);

In [31]:
hs_neither_ap_de_region = hs_neither_ap_de.groupby('region_accred')['LEAID'].count()
hs_neither_ap_de_region.rename('Neither_AP_DE', inplace=True);

In [32]:
hs_both_ap_de_region = hs_both_ap_de.groupby('region_accred')['LEAID'].count()
hs_both_ap_de_region.rename('Both_AP_DE', inplace=True);

In [33]:
pd.concat([hs_de_only_region, hs_ap_only_region, hs_neither_ap_de_region, hs_both_ap_de_region], axis = 1)

Unnamed: 0_level_0,DE_Only,AP_Only,Neither_AP_DE,Both_AP_DE
region_accred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HLC,2172,661,899,3424
MSCHE,195,876,262,1257
NEASC,79,309,69,427
NWCCU,381,222,322,543
SACS,958,483,284,3349
WASC,87,747,203,458


# By Locale

In [34]:
locale_map = {11: 'City', 12: 'City', 13: 'City', 14: 'City',
              21: 'Suburban', 22: 'Suburban', 23: 'Suburban', 24: 'Suburban',
              31: 'Town', 32: 'Town', 33: 'Town', 34: 'Town',
              41: 'Rural', 42: 'Rural', 43: 'Rural', 44: 'Rural'}

In [35]:
hs_de_only['locale'] = hs_de_only['LOCALE15'].map(locale_map)
hs_ap_only['locale'] = hs_ap_only['LOCALE15'].map(locale_map)
hs_neither_ap_de['locale'] = hs_neither_ap_de['LOCALE15'].map(locale_map)
hs_both_ap_de['locale'] = hs_both_ap_de['LOCALE15'].map(locale_map)

In [36]:
hs_de_only_locale = hs_de_only.groupby('locale')['LEAID'].count()
hs_de_only_locale.rename('DE_Only', inplace=True);

In [37]:
hs_ap_only_locale = hs_ap_only.groupby('locale')['LEAID'].count()
hs_ap_only_locale.rename('AP_Only', inplace=True);

In [38]:
hs_neither_ap_de_locale = hs_ap_only.groupby('locale')['LEAID'].count()
hs_neither_ap_de_locale.rename('Neither_AP_DE', inplace=True);

In [39]:
hs_both_ap_de_locale = hs_both_ap_de.groupby('locale')['LEAID'].count()
hs_both_ap_de_locale.rename('Both_DE_AP', inplace=True);

In [40]:
pd.concat([hs_de_only_locale, hs_ap_only_locale, hs_neither_ap_de_locale, hs_both_ap_de_locale], axis = 1)

Unnamed: 0_level_0,DE_Only,AP_Only,Neither_AP_DE,Both_DE_AP
locale,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
City,335,1186,1186,1966
Rural,2718,692,692,3135
Suburban,268,1067,1067,2858
Town,551,353,353,1499


# By Ethncity 

In [41]:
hs_de_only['pct_eth'] = (hs_de_only['total_enrollment'] - hs_de_only['SCH_ENR_WH_F'] - hs_de_only['SCH_ENR_WH_M']) / hs_de_only['total_enrollment'] 
hs_ap_only['pct_eth'] = (hs_ap_only['total_enrollment'] - hs_ap_only['SCH_ENR_WH_F'] - hs_ap_only['SCH_ENR_WH_M']) / hs_ap_only['total_enrollment'] 
hs_neither_ap_de['pct_eth'] = (hs_neither_ap_de['total_enrollment'] - hs_neither_ap_de['SCH_ENR_WH_F'] - hs_neither_ap_de['SCH_ENR_WH_M']) / hs_neither_ap_de['total_enrollment'] 
hs_both_ap_de['pct_eth'] = (hs_both_ap_de['total_enrollment'] - hs_both_ap_de['SCH_ENR_WH_F'] - hs_both_ap_de['SCH_ENR_WH_M']) / hs_both_ap_de['total_enrollment'] 

In [42]:
def eth_grouper(pct):
    if pct <= .20:
        return 1
    elif .20 < pct <= .4:
        return 2
    elif .4 < pct <= 0.6:
        return 3
    elif .6 < pct <= .8:
        return 4
    return 5

In [43]:
hs_de_only['eth_quintile'] = hs_de_only['pct_eth'].apply(lambda x: eth_grouper(x))
hs_ap_only['eth_quintile'] = hs_ap_only['pct_eth'].apply(lambda x: eth_grouper(x))
hs_neither_ap_de['eth_quintile'] = hs_neither_ap_de['pct_eth'].apply(lambda x: eth_grouper(x))
hs_both_ap_de['eth_quintile'] = hs_both_ap_de['pct_eth'].apply(lambda x: eth_grouper(x))

In [44]:
hs_de_only_eth = hs_de_only.groupby('eth_quintile')['LEAID'].count()
hs_de_only_eth.rename('DE_Only', inplace = True);

In [45]:
hs_ap_only_eth = hs_ap_only.groupby('eth_quintile')['LEAID'].count()
hs_ap_only_eth.rename('AP_Only', inplace = True);

In [46]:
hs_neither_ap_de_eth = hs_neither_ap_de.groupby('eth_quintile')['LEAID'].count()
hs_neither_ap_de_eth.rename('Neither_AP_DE', inplace = True);

In [47]:
hs_both_ap_de_eth = hs_both_ap_de.groupby('eth_quintile')['LEAID'].count()
hs_both_ap_de_eth.rename('Both_AP_DE', inplace = True);

In [48]:
pd.concat([hs_de_only_eth, hs_ap_only_eth, hs_neither_ap_de_eth, hs_both_ap_de_eth], axis = 1)

Unnamed: 0_level_0,DE_Only,AP_Only,Neither_AP_DE,Both_AP_DE
eth_quintile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2134,842,603,3543
2,699,508,310,1985
3,349,422,237,1406
4,239,413,218,954
5,451,1113,671,1570
