# <u> The National Alliance of Concurrent Enrollment Partnerships </u>
## 2015-16 Civil Rights Data Collection (CRDC)
## Advanced Placement (AP) v. Dual Enrollment (DE)
### Raw Data Augmenting
#### Alijah O'Connor - 2018
--------------------------------------------------------------------------
Use this file for creating/changing the raw data files (e.g. adding fields to the dataset).

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected = True)

%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')

In [3]:
crdc_1516 = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})

# Clean Missing Values / Add Enrollment columns
Cleaning Specificially those for int-columns -- these are recorded as negative numbers that interfere with the enrollment calculations.<br>

**This section sets up most of the calculations needed for the analyses in the report.**

In [4]:
crdc_1516_cleaned = crdc_1516.copy()

In [5]:
from my_functions.extra_functions import missing_value_mapper
crdc_1516_cleaned = crdc_1516_cleaned.applymap(missing_value_mapper)

In [6]:
"""Total Enrollments"""
crdc_1516_cleaned['total_enrollment'] = crdc_1516_cleaned['TOT_ENR_M'] + crdc_1516_cleaned['TOT_ENR_F']

crdc_1516_cleaned['total_white_enrollment'] = crdc_1516_cleaned['SCH_ENR_WH_M'] + crdc_1516_cleaned['SCH_ENR_WH_F']
crdc_1516_cleaned['total_nonwhite_enrollment'] = crdc_1516_cleaned['total_enrollment'] - crdc_1516_cleaned['total_white_enrollment']

crdc_1516_cleaned['total_hi_enrollment'] = crdc_1516_cleaned['SCH_ENR_HI_M'] + crdc_1516_cleaned['SCH_ENR_HI_F']
crdc_1516_cleaned['total_am_enrollment'] = crdc_1516_cleaned['SCH_ENR_AM_M'] + crdc_1516_cleaned['SCH_ENR_AM_F']
crdc_1516_cleaned['total_as_enrollment'] = crdc_1516_cleaned['SCH_ENR_AS_M'] + crdc_1516_cleaned['SCH_ENR_AS_F']
crdc_1516_cleaned['total_bl_enrollment'] = crdc_1516_cleaned['SCH_ENR_BL_M'] + crdc_1516_cleaned['SCH_ENR_BL_F']
crdc_1516_cleaned['total_hp_enrollment'] = crdc_1516_cleaned['SCH_ENR_HP_M'] + crdc_1516_cleaned['SCH_ENR_HP_F']
crdc_1516_cleaned['total_tr_enrollment'] = crdc_1516_cleaned['SCH_ENR_TR_M'] + crdc_1516_cleaned['SCH_ENR_TR_F']

crdc_1516_cleaned['total_lep_enrollment'] = crdc_1516_cleaned['SCH_ENR_LEP_M'] + crdc_1516_cleaned['SCH_ENR_LEP_F']
crdc_1516_cleaned['total_idea_enrollment'] = crdc_1516_cleaned['SCH_ENR_IDEA_M'] + crdc_1516_cleaned['SCH_ENR_IDEA_F']

In [7]:
"""DE Columns"""
crdc_1516_cleaned['de_total_enrollment'] = crdc_1516_cleaned['TOT_DUALENR_M'] + crdc_1516_cleaned['TOT_DUALENR_F']

crdc_1516_cleaned['de_white_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_WH_F'] +  crdc_1516_cleaned['SCH_DUALENR_WH_M']
crdc_1516_cleaned['de_nonwhite_enrollment'] = crdc_1516_cleaned['de_total_enrollment'] - crdc_1516_cleaned['de_white_enrollment']

crdc_1516_cleaned['de_hi_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_HI_M'] + crdc_1516_cleaned['SCH_DUALENR_HI_F']
crdc_1516_cleaned['de_am_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_AM_M'] + crdc_1516_cleaned['SCH_DUALENR_AM_F']
crdc_1516_cleaned['de_as_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_AS_M'] + crdc_1516_cleaned['SCH_DUALENR_AS_F']
crdc_1516_cleaned['de_bl_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_BL_M'] + crdc_1516_cleaned['SCH_DUALENR_BL_F']
crdc_1516_cleaned['de_hp_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_HP_M'] + crdc_1516_cleaned['SCH_DUALENR_HP_F']
crdc_1516_cleaned['de_tr_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_TR_M'] + crdc_1516_cleaned['SCH_DUALENR_TR_F']

crdc_1516_cleaned['de_lep_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_LEP_M'] + crdc_1516_cleaned['SCH_DUALENR_LEP_F']
crdc_1516_cleaned['de_idea_enrollment'] = crdc_1516_cleaned['SCH_DUALENR_IDEA_M'] + crdc_1516_cleaned['SCH_DUALENR_IDEA_F']

In [8]:
"""AP Columns"""
crdc_1516_cleaned['ap_total_enrollment'] = crdc_1516_cleaned['TOT_APENR_M'] + crdc_1516_cleaned['TOT_APENR_F']

crdc_1516_cleaned['ap_white_enrollment'] = crdc_1516_cleaned['SCH_APENR_WH_F'] +  crdc_1516_cleaned['SCH_APENR_WH_M']
crdc_1516_cleaned['ap_nonwhite_enrollment'] = crdc_1516_cleaned['ap_total_enrollment'] - crdc_1516_cleaned['ap_white_enrollment']

crdc_1516_cleaned['ap_hi_enrollment'] = crdc_1516_cleaned['SCH_APENR_HI_M'] + crdc_1516_cleaned['SCH_APENR_HI_F']
crdc_1516_cleaned['ap_am_enrollment'] = crdc_1516_cleaned['SCH_APENR_AM_M'] + crdc_1516_cleaned['SCH_APENR_AM_F']
crdc_1516_cleaned['ap_as_enrollment'] = crdc_1516_cleaned['SCH_APENR_AS_M'] + crdc_1516_cleaned['SCH_APENR_AS_F']
crdc_1516_cleaned['ap_bl_enrollment'] = crdc_1516_cleaned['SCH_APENR_BL_M'] + crdc_1516_cleaned['SCH_APENR_BL_F']
crdc_1516_cleaned['ap_hp_enrollment'] = crdc_1516_cleaned['SCH_APENR_HP_M'] + crdc_1516_cleaned['SCH_APENR_HP_F']
crdc_1516_cleaned['ap_tr_enrollment'] = crdc_1516_cleaned['SCH_APENR_TR_M'] + crdc_1516_cleaned['SCH_APENR_TR_F']

crdc_1516_cleaned['ap_lep_enrollment'] = crdc_1516_cleaned['SCH_APENR_LEP_M'] + crdc_1516_cleaned['SCH_APENR_LEP_F']
crdc_1516_cleaned['ap_idea_enrollment'] = crdc_1516_cleaned['SCH_APENR_IDEA_M'] + crdc_1516_cleaned['SCH_APENR_IDEA_F']

___________________________________________________________________________________________________________________

In [9]:
"""Helper Functions"""
from my_functions.extra_functions import (hs_enrollment_averager, school_sizer, 
                                          region_mapper, eth_grouper, locale_map, region_mapper_nacep, 
                                          region_mapper_census)

In [10]:
"""HS-Averaged Column"""
crdc_1516_cleaned['hs_total_enrollment'] = hs_enrollment_averager(crdc_1516_cleaned) 

In [11]:
"""HS Category Breakdown"""
crdc_1516_cleaned['hs_male'] = round((crdc_1516_cleaned['TOT_ENR_M'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_female'] = round((crdc_1516_cleaned['TOT_ENR_F'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)

crdc_1516_cleaned['hs_white'] = round((crdc_1516_cleaned['total_white_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_nonwhite'] = round((crdc_1516_cleaned['total_nonwhite_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)

crdc_1516_cleaned['hs_hi'] = round((crdc_1516_cleaned['total_hi_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_am'] = round((crdc_1516_cleaned['total_am_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_as'] = round((crdc_1516_cleaned['total_as_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_bl'] = round((crdc_1516_cleaned['total_bl_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_hp'] = round((crdc_1516_cleaned['total_hp_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_tr'] = round((crdc_1516_cleaned['total_tr_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)

crdc_1516_cleaned['hs_idea'] = round((crdc_1516_cleaned['total_idea_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)
crdc_1516_cleaned['hs_lep'] = round((crdc_1516_cleaned['total_lep_enrollment'] / crdc_1516_cleaned['total_enrollment']) * crdc_1516_cleaned['hs_total_enrollment'],0)

In [12]:
"""Add HS Enrollment-Grouped Column"""
crdc_1516_cleaned['size_group'] = crdc_1516_cleaned['hs_total_enrollment'].apply(lambda x: school_sizer(x))

In [13]:
"""Add Region Columns"""
crdc_1516_cleaned['region_accred'] = crdc_1516_cleaned['LEA_STATE'].apply(lambda x: region_mapper(x))
crdc_1516_cleaned['region_census'] = crdc_1516_cleaned['LEA_STATE'].apply(lambda x: region_mapper_census(x))
crdc_1516_cleaned['region_nacep'] = crdc_1516_cleaned['LEA_STATE'].apply(lambda x: region_mapper_nacep(x))

In [14]:
"""Add Ethnicity Percentage/Quintile"""
crdc_1516_cleaned['pct_eth'] = crdc_1516_cleaned['total_nonwhite_enrollment'] / crdc_1516_cleaned['total_enrollment']
crdc_1516_cleaned['eth_quintile'] = crdc_1516_cleaned['pct_eth'].apply(lambda pct: eth_grouper(pct))

In [15]:
"""Add Locale Column"""
crdc_1516_cleaned['locale_group'] = crdc_1516_cleaned['LOCALE15'].map(locale_map)

In [17]:
crdc_1516_cleaned.to_csv('../filtered_data/06_filter_final_full.csv')