# Perkins V - Calculating Performance Indicators by CIP

In [1]:
# Author: Matthew Fikes
# Modified: 3/123/22
import pandas as pd


## This code will calculate the Perkins 1P1, 2P1, and 3P1 performance indicators for Chart 3 of the CLNA.

#### If your file is not named CTEA.xlsx the code will prompt you to enter the filename. This assumes values in CTEA file are numeric and not descriptions (e.g. non-completion status codes are 4 and 6). This was designed only for the CTEA-1 but can be modified.

In [2]:
file = 'CTEA.xlsx'

try:
    data = pd.read_excel(file)
    print('File {0} loaded successfully'.format(file))
except:
    file = input('CTEA.xlsx not found. Please enter the full filename: ')
    try:
        data = pd.read_excel(file)
        print('File {0} loaded successfully'.format(file))
    except:
        print('No file found')
        exit
    

File CTEA.xlsx loaded successfully


In [3]:
data.rename(columns=lambda x: x.split(' / ')[1],inplace=True)

### Static Variables
These can be modified to suit your individual file. Targets for 1P1, 2P1, and 3P1 ar also included so they can be easily changed.

In [4]:
target_1 = .4975
target_2 = .2075
target_3 = .1675

In [5]:
# fields to use for calculations from merged CTEA 1A/B - can be renamed if your fields are different
cip_field = 'CIP Code'
status_field = 'YRENDSTAT_ID'
credit_field = 'Credits Earned'
emp_field = 'EMPSTAT_ID'
educ_field = 'EDUCSTAT_ID'
id_field = 'Student ID'
gender_field = 'Gender'

In [6]:
# special population fields
disab_field = 'Disabled'
displ_field = 'DISPLACED_HM'
econdis_field = 'Economic Disadvantage'
homels_field = 'Homeless'
lim_eng_field = 'Limited English'
migr_field = 'Migrant'
sparnt_field = 'Single Parent'
youth_ao_field = 'Youth Aged Out'
youth_af_field = 'Youth Armed Forces'
spop_fields = [disab_field,displ_field,econdis_field,homels_field,lim_eng_field,migr_field,sparnt_field,youth_ao_field,youth_af_field]

### Get Non-traditional Crosswalk

In [7]:
xwalk_url = 'https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx'
try:
    print('Downloading Non-traditional crosswalk from ',xwalk_url)
    nontrad_xwalk = pd.read_excel(xwalk_url)
    print('Loaded successfully')
except:
    print('Unable to connect to ',xwalk_url)

Downloading Non-traditional crosswalk from  https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx
Loaded successfully


### Join CTEA to crosswalk and separate completers

In [8]:
# merge crosswalk with loaded CTEA file, joined by CIP
new_data = nontrad_xwalk[['CIP 6 2020','Female','Male']].merge(data,left_on='CIP 6 2020',right_on=cip_field,how='inner')

In [9]:
cip_list = pd.DataFrame(new_data['CIP 6 2020'].unique().tolist(),columns={'CIP'})
cip_list['CIP Code'] = cip_list['CIP'].map('{:.4f}'.format)

In [10]:
# splits completers based on status codes 4 and 6 and total credits >=12
completer_set = new_data[(new_data[status_field].isin([4,6])) | (new_data[credit_field]>=12)] #make numerator data

In [11]:
# uncomment to make dataframe of noncompleters
#non_completers = pd.merge(new_data,completer_set,how='outer',on=id_field,indicator=True)
#noncomp_df = non_completers.loc[non_completers._merge == 'left_only']

## 1P1 Performance Indicator

In [12]:
num_1p1 = completer_set[(completer_set[status_field].isin([4,6])) & 
                        ((completer_set[emp_field].isin([1,2,3,7,8,9]))|
                          (completer_set[educ_field]==1))]
den_1p1 = completer_set[(completer_set[status_field].isin([4,6]))]

In [13]:
# Calculate performance for each special population by CIP, return values below target
rows_1p1 = []
for i in spop_fields:

    lim_num = pd.DataFrame(num_1p1[(num_1p1[i]==1)])
    lim_den = pd.DataFrame(den_1p1[(den_1p1[i]==1)])

    num = pd.DataFrame(lim_num).filter([cip_field,i]).groupby(cip_field).count()
    dem = pd.DataFrame(lim_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(dem)
    underperf = pct[(pct.values<=target_1)]
    
    rows_1p1.append(underperf.reset_index())
    
spop_1p1 = pd.concat(rows_1p1).dropna(axis=1,how='all').groupby('CIP Code').max().reset_index()

In [14]:
num_grp_1 = num_1p1.groupby(cip_field)[id_field].count()
den_grp_1 = den_1p1.groupby(cip_field)[id_field].count()
percents_1p1 = num_grp_1.divide(den_grp_1,fill_value=0)

In [15]:
percents_1p1

CIP Code
11.0201    0.333333
11.0401    0.800000
11.0701    0.666667
11.1003    0.750000
15.0201    0.888889
15.0303    0.666667
15.0407    0.000000
15.0801    1.000000
15.0805    0.333333
15.1302    0.500000
31.0101    0.625000
43.0103    0.366667
43.0107    0.312500
43.0114    0.500000
43.0204    0.700000
44.0000    0.705882
45.0702    1.000000
47.0101    0.555556
47.0201    0.000000
48.0508    0.369565
48.0510    0.500000
50.0102    0.230769
50.0406    0.500000
50.0409    0.600000
50.0410    0.210526
51.0000    0.333333
51.0707    0.555556
51.0907    0.625000
51.0908    0.166667
51.1501    0.722222
51.3801    0.437500
52.0201    0.605505
52.0205    0.692308
52.0302    0.454545
52.0401    0.400000
52.0801    0.800000
Name: Student ID, dtype: float64

### MOST RECENT PERFORMANCE DATA BY PROGRAM

In [16]:
pf1p1a = zip(percents_1p1.index,percents_1p1.values)
for i in pf1p1a:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

11.0201---33.33%
11.0401---80.00%
11.0701---66.67%
11.1003---75.00%
15.0201---88.89%
15.0303---66.67%
15.0407---0.00%
15.0801---100.00%
15.0805---33.33%
15.1302---50.00%
31.0101---62.50%
43.0103---36.67%
43.0107---31.25%
43.0114---50.00%
43.0204---70.00%
44.0000---70.59%
45.0702---100.00%
47.0101---55.56%
47.0201---0.00%
48.0508---36.96%
48.0510---50.00%
50.0102---23.08%
50.0406---50.00%
50.0409---60.00%
50.0410---21.05%
51.0000---33.33%
51.0707---55.56%
51.0907---62.50%
51.0908---16.67%
51.1501---72.22%
51.3801---43.75%
52.0201---60.55%
52.0205---69.23%
52.0302---45.45%
52.0401---40.00%
52.0801---80.00%


### PROGRAMS NOT MEETING TARGET

In [17]:
p_unmet_1p1 = pd.DataFrame(percents_1p1[(percents_1p1.values<=target_1)]).reset_index().set_index('CIP Code')
p_unmet_1p1_pf = percents_1p1[(percents_1p1.values<=target_1)]

In [18]:
pf1p1b = zip(p_unmet_1p1_pf.index,p_unmet_1p1_pf.values)
for i in pf1p1b:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

11.0201---33.33%
15.0407---0.00%
15.0805---33.33%
43.0103---36.67%
43.0107---31.25%
47.0201---0.00%
48.0508---36.96%
50.0102---23.08%
50.0410---21.05%
51.0000---33.33%
51.0908---16.67%
51.3801---43.75%
52.0302---45.45%
52.0401---40.00%


### SPECIAL POPULATIONS NOT MEETING TARGET WITHIN PROGRAMS

In [19]:
# Special Pops falling below 1P1 target, by CIP
unmet_spop_1p1 = spop_1p1.set_index('CIP Code')


In [20]:
for items in rows_1p1:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))

    except:
        print('NA')
    

Disabled
50.0102---40.00%
50.0410---33.33%
52.0201---33.33%
DISPLACED_HM
51.3801---33.33%
Economic Disadvantage
15.0805---25.00%
43.0103---37.50%
43.0107---35.71%
48.0508---33.33%
50.0102---20.00%
50.0410---20.00%
51.3801---48.65%
52.0401---28.57%
Homeless
Limited English
52.0302---25.00%
Migrant
Single Parent
51.3801---25.00%
52.0201---42.86%
Youth Aged Out
Youth Armed Forces


## 2P1 Performance Indicator

In [21]:
num_2p1 = completer_set[(completer_set[status_field].isin([4,6]))]
den_2p1 = completer_set[(completer_set[status_field].isin([4,5,6]))]
num_grp_2 = num_2p1.groupby(cip_field)[id_field].count()
den_grp_2 = den_2p1.groupby(cip_field)[id_field].count()

In [22]:
# Calculate performance for each special population by CIP, return values below target
rows_2p1 = []
for i in spop_fields:

    lim_num = pd.DataFrame(num_2p1[(num_2p1[i]==1)])
    lim_den = pd.DataFrame(den_2p1[(den_2p1[i]==1)])

    num = pd.DataFrame(lim_num).filter([cip_field,i]).groupby(cip_field).count()
    den = pd.DataFrame(lim_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(den)
    
    underperf = pct[(pct.values<=target_2)]
    if len(underperf)>0:
        rows_2p1.append(underperf.reset_index())
    
try:
    spop_2p1 = pd.concat(rows_2p1).dropna(axis=1,how='all').groupby('CIP Code').max().reset_index()
except:
    
    spop_2p1 = pd.DataFrame(columns={'CIP Code'})

In [23]:
percents_2p1 = num_grp_2.divide(den_grp_2,fill_value=0)
percents_2p1

CIP Code
11.0201    0.666667
11.0401    0.454545
11.0701    0.545455
11.1003    0.727273
15.0201    0.900000
15.0303    0.461538
15.0407    0.666667
15.0616    0.000000
15.0801    0.800000
15.0805    0.461538
15.1302    1.000000
31.0101    0.888889
43.0103    0.566038
43.0107    0.800000
43.0114    1.000000
43.0204    0.416667
44.0000    0.666667
45.0702    1.000000
47.0101    0.782609
47.0201    0.700000
48.0508    0.901961
48.0510    0.909091
50.0102    0.684211
50.0406    0.769231
50.0409    0.535714
50.0410    0.612903
51.0000    0.103448
51.0707    0.500000
51.0907    1.000000
51.0908    0.800000
51.1501    0.750000
51.3801    0.872727
52.0201    0.731544
52.0205    0.866667
52.0302    0.628571
52.0401    0.769231
52.0703    0.000000
52.0801    0.909091
Name: Student ID, dtype: float64

### MOST RECENT PERFORMANCE DATA BY PROGRAM

In [24]:
pf2p1a = zip(percents_2p1.index,percents_2p1.values)
for i in pf2p1a:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

11.0201---66.67%
11.0401---45.45%
11.0701---54.55%
11.1003---72.73%
15.0201---90.00%
15.0303---46.15%
15.0407---66.67%
15.0616---0.00%
15.0801---80.00%
15.0805---46.15%
15.1302---100.00%
31.0101---88.89%
43.0103---56.60%
43.0107---80.00%
43.0114---100.00%
43.0204---41.67%
44.0000---66.67%
45.0702---100.00%
47.0101---78.26%
47.0201---70.00%
48.0508---90.20%
48.0510---90.91%
50.0102---68.42%
50.0406---76.92%
50.0409---53.57%
50.0410---61.29%
51.0000---10.34%
51.0707---50.00%
51.0907---100.00%
51.0908---80.00%
51.1501---75.00%
51.3801---87.27%
52.0201---73.15%
52.0205---86.67%
52.0302---62.86%
52.0401---76.92%
52.0703---0.00%
52.0801---90.91%


### PROGRAMS NOT MEETING TARGET

In [25]:
p_unmet_2p1 = pd.DataFrame(percents_2p1[(percents_2p1.values<=target_2)]).reset_index().set_index('CIP Code')
p_unmet_2p1_pf = percents_2p1[(percents_2p1.values<=target_2)]

In [26]:
pf2p1b = zip(p_unmet_2p1_pf.index,p_unmet_2p1_pf.values)
for i in pf2p1b:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

15.0616---0.00%
51.0000---10.34%
52.0703---0.00%


### SPECIAL POPULATIONS NOT MEETING TARGET WITHIN PROGRAMS

In [27]:
# Special Pops falling below 1P1 target, by CIP
unmet_spop_2p1 = spop_2p1.set_index('CIP Code')

In [28]:
for items in rows_2p1:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))

    except:
        print('NA')
    

Disabled
51.0000---20.00%
Economic Disadvantage
51.0000---8.00%
Single Parent
43.0103---20.00%


## 3P1 Performance Indicator

In [29]:
num_3p1 = completer_set[((completer_set['Female']=='Y') & (completer_set[gender_field]==2))|((completer_set['Male']=='Y') & (completer_set[gender_field]==1))]
num_grp_3 = num_3p1.groupby(cip_field)[id_field].count()
den_3p1 = completer_set[(completer_set['Female']=='Y') | (completer_set['Male']=='Y')]
den_grp_3 = den_3p1.groupby(cip_field)[id_field].count()
percents_3p1 = num_grp_3.divide(den_grp_3,fill_value=0)

In [30]:
# Calculate performance for each special population by CIP, return values below target
rows_3p1 = []
for i in spop_fields:

    lim_num = pd.DataFrame(num_3p1[(num_3p1[i]==1)])
    lim_den = pd.DataFrame(den_3p1[(den_3p1[i]==1)])

    num = pd.DataFrame(lim_num).filter([cip_field,i]).groupby(cip_field).count()
    dem = pd.DataFrame(lim_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(dem)
    underperf = pct[(pct.values<=target_3)]
    
    rows_3p1.append(underperf.reset_index())
    
spop_3p1 = pd.concat(rows_3p1).dropna(axis=1,how='all').groupby('CIP Code').max().reset_index()

### MOST RECENT PERFORMANCE DATA BY PROGRAM

In [31]:
pf3p1a = zip(percents_3p1.index,percents_3p1.values)
for i in pf3p1a:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

11.0201---13.16%
11.0401---28.57%
11.0701---43.48%
11.1003---22.64%
13.1314---0.00%
15.0201---3.33%
15.0303---14.81%
15.0407---0.00%
15.0616---0.00%
15.0801---0.00%
15.0805---5.71%
15.1302---10.00%
43.0103---42.98%
43.0107---28.57%
43.0114---50.00%
45.0702---0.00%
47.0101---6.67%
47.0201---2.38%
48.0508---21.59%
48.0510---18.52%
51.0707---2.22%
51.1501---36.73%
51.3801---16.38%
52.0201---49.36%
52.0205---10.87%
52.0302---34.38%


### PROGRAMS NOT MEETING TARGET

In [32]:
p_unmet_3p1 = pd.DataFrame(percents_3p1[(percents_3p1.values<=target_3)]).reset_index().set_index('CIP Code')
p_unmet_3p1_pf = percents_3p1[(percents_3p1.values<=target_3)]

In [33]:
pf3p1b = zip(p_unmet_3p1_pf.index,p_unmet_3p1_pf.values)
for i in pf3p1b:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

11.0201---13.16%
13.1314---0.00%
15.0201---3.33%
15.0303---14.81%
15.0407---0.00%
15.0616---0.00%
15.0801---0.00%
15.0805---5.71%
15.1302---10.00%
45.0702---0.00%
47.0101---6.67%
47.0201---2.38%
51.0707---2.22%
51.3801---16.38%
52.0205---10.87%


### SPECIAL POPULATIONS NOT MEETING TARGET WITHIN PROGRAMS

In [34]:
# Special Pops falling below 1P1 target, by CIP
unmet_spop_3p1 = spop_3p1.set_index('CIP Code')


In [35]:
for items in rows_3p1:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))

    except:
        print('NA')
    

Disabled
48.0508---10.00%
DISPLACED_HM
Economic Disadvantage
11.0201---14.29%
15.0201---4.00%
15.0303---12.50%
47.0201---3.12%
51.3801---15.91%
52.0205---15.00%
Homeless
Limited English
Migrant
Single Parent
51.3801---10.53%
52.0302---11.11%
Youth Aged Out
Youth Armed Forces


## RESULTS

In [38]:
# load measures into dataframe
df1a = pd.DataFrame(percents_1p1).reset_index()
df1a['Target 1'] = target_1
df1a.rename(columns={id_field:'Measure 1P1'},inplace=True)
df1b = df1a.merge(p_unmet_1p1,on='CIP Code',how='left')
df1b.rename(columns={id_field:'Unmet 1P1'},inplace=True)
df1c =spop_1p1
df1c.rename(columns={'Disabled':'Unmet 1P1 Disabled',
                     'DISPLACED_HM': 'Unmet 1P1 Displaced',
                     'Economic Disadvantage': 'Unmet 1P1 Econ Dis',
                     'Homeless': 'Unmet 1P1 Homeless',
                     'Limited English': 'Unmet 1P1 Lim Eng',
                     'Migrant': 'Unmet 1P1 Migrant',
                     'Single Parent': 'Unmet 1P1 Sing Par',
                     'Youth Aged Out': 'Unmet 1P1 Aged Out',
                     'Youth Armed Forces': 'Unmet 1P1 Armed Forces'
                     
                    },inplace=True)
df1 = df1b.merge(df1c,on='CIP Code',how='left')



df2a = pd.DataFrame(percents_2p1).reset_index()
df2a['Target 2'] = target_2
df2a.rename(columns={'Student ID':'Measure 2P1'},inplace=True)
df2b = df2a.merge(p_unmet_2p1,on='CIP Code',how='left')
df2b.rename(columns={'Student ID':'Unmet 2P1'},inplace=True)
df2c =spop_2p1
df2c.rename(columns={'Disabled':'Unmet 2P1 Disabled',
                     'DISPLACED_HM': 'Unmet 2P1 Displaced',
                     'Economic Disadvantage': 'Unmet 2P1 Econ Dis',
                     'Homeless': 'Unmet 2P1 Homeless',
                     'Limited English': 'Unmet 2P1 Lim Eng',
                     'Migrant': 'Unmet 2P1 Migrant',
                     'Single Parent': 'Unmet 2P1 Sing Par',
                     'Youth Aged Out': 'Unmet 2P1 Aged Out',
                     'Youth Armed Forces': 'Unmet 2P1 Armed Forces'
                     
                    },inplace=True)
df2 = df2b.merge(df2c,on=cip_field,how='left')


df3a = pd.DataFrame(percents_3p1).reset_index()
df3a['Target 3'] = target_3
df3a.rename(columns={'Student ID':'Measure 3P1'},inplace=True)
df3b = df3a.merge(p_unmet_3p1,on='CIP Code',how='left')
df3b.rename(columns={'Student ID':'Unmet 3P1'},inplace=True)
df3c =spop_3p1
df3c.rename(columns={'Disabled':'Unmet 3P1 Disabled',
                     'DISPLACED_HM': 'Unmet 3P1 Displaced',
                     'Economic Disadvantage': 'Unmet 3P1 Econ Dis',
                     'Homeless': 'Unmet 3P1 Homeless',
                     'Limited English': 'Unmet 3P1 Lim Eng',
                     'Migrant': 'Unmet 3P1 Migrant',
                     'Single Parent': 'Unmet 3P1 Sing Par',
                     'Youth Aged Out': 'Unmet 3P1 Aged Out',
                     'Youth Armed Forces': 'Unmet 3P1 Armed Forces'
                     
                    },inplace=True)
df3 = df3b.merge(df3c,on=cip_field,how='left')

m1 = df1.merge(df2,on=cip_field,how='left')
measures = m1.merge(df3,on=cip_field,how='left')
measures['CIP Code'] = measures['CIP Code'].map('{:.4f}'.format)
final_measures = measures.merge(cip_list,on='CIP Code',how='right')


In [39]:
print('Overall Performance for 1P1 is {0:.2f}%'.format(len(num_1p1)/len(den_1p1)*100))
print('Overall Performance for 2P1 is {0:.2f}%'.format(len(num_2p1)/len(den_2p1)*100))
print('Overall Performance for for 3P1 is {0:.2f}%'.format(len(num_3p1)/len(den_3p1)*100))

Overall Performance for 1P1 is 52.44%
Overall Performance for 2P1 is 69.35%
Overall Performance for for 3P1 is 27.19%


In [40]:
final_measures.dropna(subset=['CIP Code'])

Unnamed: 0,CIP Code,Measure 1P1,Target 1,Unmet 1P1,Unmet 1P1 Disabled,Unmet 1P1 Displaced,Unmet 1P1 Econ Dis,Unmet 1P1 Lim Eng,Unmet 1P1 Sing Par,Measure 2P1,...,Unmet 2P1 Disabled,Unmet 2P1 Econ Dis,Unmet 2P1 Sing Par,Measure 3P1,Target 3,Unmet 3P1,Unmet 3P1 Disabled,Unmet 3P1 Econ Dis,Unmet 3P1 Sing Par,CIP
0,15.0201,0.888889,0.4975,,,,,,,0.9,...,,,,0.033333,0.1675,0.033333,,0.04,,15.0201
1,47.0201,0.0,0.4975,0.0,,,,,,0.7,...,,,,0.02381,0.1675,0.02381,,0.03125,,47.0201
2,50.0102,0.230769,0.4975,0.230769,0.4,,0.2,,,0.684211,...,,,,,,,,,,50.0102
3,50.0406,0.5,0.4975,,,,,,,0.769231,...,,,,,,,,,,50.0406
4,50.0409,0.6,0.4975,,,,,,,0.535714,...,,,,,,,,,,50.0409
5,50.041,0.210526,0.4975,0.210526,0.333333,,0.2,,,0.612903,...,,,,,,,,,,50.041
6,51.0707,0.555556,0.4975,,,,,,,0.5,...,,,,0.022222,0.1675,0.022222,,,,51.0707
7,52.0201,0.605505,0.4975,,0.333333,,,,0.428571,0.731544,...,,,,0.493631,0.1675,,,,,52.0201
8,52.0205,0.692308,0.4975,,,,,,,0.866667,...,,,,0.108696,0.1675,0.108696,,0.15,,52.0205
9,52.0401,0.4,0.4975,0.4,,,0.285714,,,0.769231,...,,,,,,,,,,52.0401


In [41]:
measures.to_excel('measures.xlsx',index=None)
print('Exported to measures.xlsx')


Exported to measures.xlsx


## Chart 7b: Access and Equity

These lines will output the answers for Chart7b of the CLNA. Any CIPs that do not appear on the list have zero enrollment or completion for that special population. If you wish to show all of them, uncomment the print statements under each else in the lines below.

In [42]:
concentrator_set = new_data[(new_data[credit_field]>=12)]

In [43]:
spop_enrl = concentrator_set
spop_compl = completer_set[(completer_set[status_field].isin([4,6]))]

num_grp_2 = num_2p1.groupby(cip_field)[id_field].count()
den_grp_2 = den_2p1.groupby(cip_field)[id_field].count()

In [44]:
nontrad_enrl = concentrator_set[((concentrator_set['Female']=='Y') & (concentrator_set[gender_field]==2))|((concentrator_set['Male']=='Y') & (concentrator_set[gender_field]==1))]
nontrad_comp = spop_compl[((spop_compl['Female']=='Y') & (spop_compl[gender_field]==2))|((spop_compl['Male']=='Y') & (spop_compl[gender_field]==1))]

nt_enrl = len(nontrad_enrl)
nt_comp = len(nontrad_comp)

nt_num = pd.DataFrame(nontrad_enrl).groupby(cip_field)['Student ID'].count()
nt_den = pd.DataFrame(concentrator_set).groupby(cip_field)['Student ID'].count()
nt_pct = nt_num.divide(nt_den)

ntc_num = pd.DataFrame(nontrad_comp).groupby(cip_field)['Student ID'].count()
ntc_den = pd.DataFrame(spop_compl).groupby(cip_field)['Student ID'].count()
ntc_pct = ntc_num.divide(ntc_den)


### Non-Traditional Enrollment

In [45]:
for i in range(1,len(nt_pct)):
    cip = nt_pct.index[i]
    meas = nt_pct.values[i]
    if meas >0:
        
        print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
    else:
        #print('{0:.4f}---{1:.2f}%'.format(cip,0))
        next

11.0401---28.57%
11.0701---43.48%
11.1003---22.64%
15.0201---3.33%
15.0303---14.81%
15.0805---5.71%
15.1302---10.00%
43.0103---42.98%
43.0107---28.57%
43.0114---50.00%
47.0101---6.67%
47.0201---2.38%
48.0508---21.59%
48.0510---18.52%
51.0707---2.22%
51.1501---36.73%
51.3801---16.38%
52.0201---49.36%
52.0205---10.87%
52.0302---34.38%


### Non-traditional Completion Rates

In [46]:
for i in range(1,len(ntc_pct)):
    cip = ntc_pct.index[i]
    meas = ntc_pct.values[i]
    if meas >0:
        
        print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
    else:
        #print('{0:.4f}---{1:.2f}%'.format(cip,0))
        next

11.0401---40.00%
11.0701---50.00%
11.1003---28.12%
15.0201---11.11%
15.0303---33.33%
15.1302---16.67%
43.0103---46.67%
43.0107---37.50%
43.0114---75.00%
47.0101---5.56%
48.0508---23.91%
48.0510---20.00%
51.1501---11.11%
51.3801---12.50%
52.0201---57.80%
52.0205---7.69%
52.0302---54.55%


## Enrollment and Completion by Special Populations

In [47]:
# Calculate performance for each special population by CIP, return values below target
enr_rate = []
comp_rate = []
for i in spop_fields:

    sp_num = pd.DataFrame(spop_enrl[(spop_enrl[i]==1)])
    sp_den = spop_enrl
    
    spc_num = pd.DataFrame(spop_compl[(spop_compl[i]==1)])
    spc_den = spop_compl
  
    num = pd.DataFrame(sp_num).filter([cip_field,i]).groupby(cip_field).count()
    den = pd.DataFrame(sp_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(den)
    
    cnum = pd.DataFrame(spc_num).filter([cip_field,i]).groupby(cip_field).count()
    cden = pd.DataFrame(spc_den).filter([cip_field,i]).groupby(cip_field).count()
    cpct = cnum.divide(cden)
    
    
    enr_rate.append(pct.reset_index())
    
    comp_rate.append(cpct.reset_index())
    


### Enrollment Rates by Special Populations

In [48]:
for items in enr_rate:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            if meas>0:
                
                print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
            else:
                next

    except:
        
        print('NA')
    

Disabled
11.0201---5.26%
11.0401---28.57%
11.0701---4.35%
11.1003---8.49%
15.0201---6.67%
15.0303---7.41%
15.0407---11.76%
15.0801---25.00%
15.0805---14.29%
15.1302---20.00%
31.0101---33.33%
43.0103---11.57%
43.0107---14.29%
43.0204---3.12%
44.0000---24.22%
45.0702---50.00%
47.0101---4.00%
47.0201---9.52%
48.0508---11.36%
48.0510---14.81%
50.0102---41.46%
50.0406---18.52%
50.0409---27.78%
50.0410---46.43%
51.0000---7.97%
51.0707---4.44%
51.0908---8.00%
51.1501---20.41%
51.3801---7.76%
52.0201---7.96%
52.0205---4.35%
52.0302---1.56%
52.0401---16.00%
DISPLACED_HM
11.0201---2.63%
11.1003---5.66%
15.1302---10.00%
31.0101---6.67%
43.0103---3.31%
43.0107---4.76%
44.0000---5.47%
47.0101---1.33%
47.0201---2.38%
48.0508---4.55%
48.0510---3.70%
50.0406---7.41%
50.0409---1.85%
51.0000---10.51%
51.0707---28.89%
51.0907---11.43%
51.0908---12.00%
51.1501---14.29%
51.3801---14.66%
52.0201---7.01%
52.0205---2.17%
52.0302---10.94%
52.0401---28.00%
Economic Disadvantage
11.0201---73.68%
11.0401---78.57%

### Completion rates by special populations

In [49]:
for items in comp_rate:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            
            if meas>=0:
                print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
                
            else:
                #print('{0:.4f}---{1:.1f}%'.format(cip,0))
                next

    except:
        print('NA')
    

Disabled
11.0401---20.00%
11.1003---6.25%
15.0407---25.00%
15.0801---25.00%
15.0805---16.67%
15.1302---16.67%
31.0101---25.00%
43.0103---6.67%
43.0107---12.50%
43.0204---10.00%
44.0000---17.65%
45.0702---100.00%
47.0101---5.56%
47.0201---28.57%
48.0508---10.87%
48.0510---20.00%
50.0102---38.46%
50.0409---53.33%
50.0410---47.37%
51.0000---33.33%
51.0908---16.67%
51.1501---27.78%
51.3801---8.33%
52.0201---8.26%
52.0205---7.69%
52.0401---20.00%
DISPLACED_HM
11.0201---16.67%
11.1003---12.50%
31.0101---12.50%
43.0103---3.33%
43.0107---6.25%
44.0000---2.94%
50.0406---10.00%
51.0707---44.44%
51.0907---6.25%
51.1501---16.67%
51.3801---12.50%
52.0201---9.17%
52.0302---18.18%
52.0401---30.00%
Economic Disadvantage
11.0201---66.67%
11.0401---100.00%
11.0701---50.00%
11.1003---50.00%
15.0201---88.89%
15.0303---33.33%
15.0407---50.00%
15.0801---75.00%
15.0805---66.67%
15.1302---66.67%
31.0101---75.00%
43.0103---53.33%
43.0107---87.50%
43.0114---75.00%
43.0204---30.00%
44.0000---67.65%
45.0702---100