## Perkins V - Calculating Performance Indicators by CIP

In [3]:
import pandas as pd
import numpy as np


### This code will calculate the Perkins 2P1 and 3P1 performance indicators.

#### If your file is not named CTEA.xlsx the code will prompt you to enter the filename. This assumes values in CTEA file are numeric and not descriptions (e.g. non-completion status codes are 4 and 6). This was designed only for the CTEA-1 but can be modified.

In [4]:
file = 'CTEA.xlsx'

try:
    data = pd.read_excel(file)
    print('File {0} loaded successfully'.format(file))
except:
    file = input('CTEA.xlsx not found. Please enter the full filename: ')
    try:
        data = pd.read_excel(file)
        print('File {0} loaded successfully'.format(file))
    except:
        print('No file found')
        exit
    

File CTEA.xlsx loaded successfully


In [5]:
# fields to use for calculations from merged CTEA 1A/B - can be renamed if your fields are different
cip_field = 'CTEA 1A / CIP Code / DYN||8774_DYN'
status_field = 'CTEA 1A / YRENDSTAT_ID / DYN||8765_DYN'
credit_field = 'CTEA 1A / Credits Earned / DYN||8763_DYN'
emp_field = 'CTEA 1B / EMPSTAT_ID / DYN||8785_DYN'
educ_field = 'CTEA 1B / EDUCSTAT_ID / DYN||8784_DYN'
id_field = 'CTEA 1A / Student ID / DYN||8761_DYN'
gender_field = 'CTEA 1A / Gender / DYN||8770_DYN'

In [12]:
# special population fields
disab_field = 'CTEA 1A / Disabled / DYN||8766_DYN'
displ_field = 'CTEA 1A / DISPLACED_HM / DYN||8776_DYN'
econdis_field = 'CTEA 1A / Economic Disadvantage / DYN||8777_DYN'
homels_field = 'CTEA 1A / Homeless / DYN||8769_DYN'
lim_eng_field = 'CTEA 1A / Limited English / DYN||8780_DYN'
migr_field = 'CTEA 1A / Migrant / DYN||8772_DYN'
sparnt_field = 'CTEA 1A / Single Parent / DYN||8775_DYN'
youth_ao_field = 'CTEA 1A / Youth Aged Out / DYN||8779_DYN'
youth_af_field = 'CTEA 1A / Youth Armed Forces / DYN||8762_DYN'


In [18]:
def special_pop(df):
    pop_df = df[((df[disab_field]==1) |
                         (df[displ_field]==1) | 
                         (df[econdis_field]==1) |
                         (df[homels_field]==1) | 
                         (df[lim_eng_field]==1) |
                         (df[migr_field]==1) | 
                         (df[sparnt_field]==1) |
                         (df[youth_ao_field]==1) | 
                         (df[youth_af_field]==1))]
    return(pop_df)


### Get Non-traditional Crosswalk

In [6]:
xwalk_url = 'https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx'
try:
    print('Downloading Non-traditional crosswalk from ',xwalk_url)
    nontrad_xwalk = pd.read_excel(xwalk_url)
    print('Loaded successfully')
except:
    print('Unable to connect to ',xwalk_url)

Downloading Non-traditional crosswalk from  https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx
Loaded successfully


### Join CTEA to crosswalk and separate completers

In [7]:
# merge crosswalk with loaded CTEA file, joined by CIP
new_data = nontrad_xwalk[['CIP 6 2020','Female','Male']].merge(data,left_on='CIP 6 2020',right_on=cip_field,how='right')

In [8]:
# splits completers based on status codes 4 and 6 and total credits >=12
completer_set = new_data[(new_data[status_field].isin([4,6])) | (new_data[credit_field]>=12)] #make numerator data

In [7]:
# uncomment to make dataframe of noncompleters
#non_completers = pd.merge(new_data,completer_set,how='outer',on=id_field,indicator=True)
#noncomp_df = non_completers.loc[non_completers._merge == 'left_only']

## 1P1 Performance Indicator

In [9]:
num_1p1 = completer_set[(completer_set[status_field].isin([4,6])) & 
                        ((completer_set[emp_field].isin([1,2,3,7,8,9]))|
                          (completer_set[educ_field]==1))]
den_1p1 = completer_set[(completer_set[status_field].isin([4,6]))]

In [19]:
num_1p1_spop = special_pop(num_1p1)

In [23]:
num_grp_1 = num_1p1.groupby(cip_field)[id_field].count()
num_grp_1_spop = num_1p1_spop.groupby(cip_field)[id_field].count()
den_grp_1 = den_1p1.groupby(cip_field)[id_field].count()

In [27]:
percents_1p1 = num_grp_1.divide(den_grp_1,fill_value=0)

In [31]:
spop_percents_1p1 = num_grp_1_spop.divide(den_grp_1,fill_value=0)

In [100]:
# view percents by CIP
percents_1p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.333333
11.0401    0.800000
11.0701    0.666667
11.1003    0.750000
15.0201    0.888889
15.0303    0.666667
15.0407    0.000000
15.0801    1.000000
15.0805    0.333333
15.1302    0.500000
31.0101    0.625000
43.0103    0.366667
43.0107    0.312500
43.0114    0.500000
43.0204    0.700000
44.0000    0.705882
45.0702    1.000000
47.0101    0.555556
47.0201    0.000000
48.0508    0.369565
48.0510    0.500000
50.0102    0.230769
50.0406    0.500000
50.0409    0.600000
50.0410    0.210526
51.0000    0.333333
51.0707    0.555556
51.0907    0.625000
51.0908    0.166667
51.1501    0.722222
51.3801    0.437500
52.0201    0.605505
52.0205    0.692308
52.0302    0.454545
52.0401    0.400000
52.0801    0.800000
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

In [34]:
spop_percents_1p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.333333
11.0401    0.800000
11.0701    0.500000
11.1003    0.562500
15.0201    0.888889
15.0303    0.333333
15.0407    0.000000
15.0801    1.000000
15.0805    0.166667
15.1302    0.500000
31.0101    0.375000
43.0103    0.200000
43.0107    0.312500
43.0114    0.500000
43.0204    0.400000
44.0000    0.500000
45.0702    1.000000
47.0101    0.277778
47.0201    0.000000
48.0508    0.239130
48.0510    0.400000
50.0102    0.230769
50.0406    0.400000
50.0409    0.600000
50.0410    0.210526
51.0000    0.000000
51.0707    0.444444
51.0907    0.375000
51.0908    0.000000
51.1501    0.444444
51.3801    0.416667
52.0201    0.513761
52.0205    0.384615
52.0302    0.409091
52.0401    0.400000
52.0801    0.800000
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

## 2P1 Performance Indicator

In [36]:

num_2p1 = completer_set[(completer_set[status_field].isin([4,6]))]
num_2p1_spop = special_pop(num_2p1)

den_2p1 = completer_set[(completer_set[status_field].isin([4,5,6]))]

In [37]:
num_grp_2 = num_2p1.groupby(cip_field)[id_field].count()
num_grp_2_spop = num_2p1_spop.groupby(cip_field)[id_field].count()
den_grp_2 = den_2p1.groupby(cip_field)[id_field].count()

In [40]:
percents_2p1 = num_grp_2.divide(den_grp_2,fill_value=0)
spop_percents_2p1 = num_grp_2_spop.divide(den_grp_2,fill_value=0)

In [41]:
# view percents by CIP
percents_2p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.666667
11.0401    0.454545
11.0701    0.545455
11.1003    0.727273
15.0201    0.900000
15.0303    0.461538
15.0407    0.666667
15.0616    0.000000
15.0801    0.800000
15.0805    0.461538
15.1302    1.000000
31.0101    0.888889
43.0103    0.566038
43.0107    0.800000
43.0114    1.000000
43.0204    0.416667
44.0000    0.666667
45.0702    1.000000
47.0101    0.782609
47.0201    0.700000
48.0508    0.901961
48.0510    0.909091
50.0102    0.684211
50.0406    0.769231
50.0409    0.535714
50.0410    0.612903
51.0000    0.103448
51.0707    0.500000
51.0907    1.000000
51.0908    0.800000
51.1501    0.750000
51.3801    0.872727
52.0201    0.731544
52.0205    0.866667
52.0302    0.628571
52.0401    0.769231
52.0703    0.000000
52.0801    0.909091
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

In [38]:
spop_percents_1p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.333333
11.0401    0.800000
11.0701    0.500000
11.1003    0.562500
15.0201    0.888889
15.0303    0.333333
15.0407    0.000000
15.0801    1.000000
15.0805    0.166667
15.1302    0.500000
31.0101    0.375000
43.0103    0.200000
43.0107    0.312500
43.0114    0.500000
43.0204    0.400000
44.0000    0.500000
45.0702    1.000000
47.0101    0.277778
47.0201    0.000000
48.0508    0.239130
48.0510    0.400000
50.0102    0.230769
50.0406    0.400000
50.0409    0.600000
50.0410    0.210526
51.0000    0.000000
51.0707    0.444444
51.0907    0.375000
51.0908    0.000000
51.1501    0.444444
51.3801    0.416667
52.0201    0.513761
52.0205    0.384615
52.0302    0.409091
52.0401    0.400000
52.0801    0.800000
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

## 3P1 Performance Indicator

In [61]:
num_3p1 = completer_set[((completer_set['Female']=='Y') & (completer_set[gender_field]==2))|((completer_set['Male']=='Y') & (completer_set[gender_field]==1))]
num_grp_3 = num_3p1.groupby(cip_field)[id_field].count()
num_3p1_spop = special_pop(num_3p1)
num_grp_3_spop = num_3p1_spop.groupby(cip_field)[id_field].count()
den_3p1 = completer_set[(completer_set['Female']=='Y') | (completer_set['Male']=='Y')]
den_grp_3 = den_3p1.groupby(cip_field)[id_field].count()



In [65]:
percents_3p1 = num_grp_3.divide(den_grp_3,fill_value=0)
spop_percents_3p1 = num_grp_3_spop.divide(den_grp_3,fill_value=0)

In [63]:
# view percents by CIP
percents_3p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.131579
11.0401    0.285714
11.0701    0.434783
11.1003    0.226415
13.1314    0.000000
15.0201    0.033333
15.0303    0.148148
15.0407    0.000000
15.0616    0.000000
15.0801    0.000000
15.0805    0.057143
15.1302    0.100000
43.0103    0.429752
43.0107    0.285714
43.0114    0.500000
45.0702    0.000000
47.0101    0.066667
47.0201    0.023810
48.0508    0.215909
48.0510    0.185185
51.0707    0.022222
51.1501    0.367347
51.3801    0.163793
52.0201    0.493631
52.0205    0.108696
52.0302    0.343750
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

In [66]:
# view percents by CIP
spop_percents_3p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.105263
11.0401    0.250000
11.0701    0.434783
11.1003    0.216981
13.1314    0.000000
15.0201    0.033333
15.0303    0.074074
15.0407    0.000000
15.0616    0.000000
15.0801    0.000000
15.0805    0.057143
15.1302    0.100000
43.0103    0.380165
43.0107    0.285714
43.0114    0.375000
45.0702    0.000000
47.0101    0.000000
47.0201    0.023810
48.0508    0.193182
48.0510    0.148148
51.0707    0.022222
51.1501    0.326531
51.3801    0.129310
52.0201    0.394904
52.0205    0.065217
52.0302    0.296875
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

In [97]:
# load measures into dataframe
df1a = pd.DataFrame(percents_1p1).reset_index()
df1a.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Measure 1P1'},inplace=True)
df1b = pd.DataFrame(spop_percents_1p1).reset_index()
df1b.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Special Pop 1P1'},inplace=True)
df1 = df1a.merge(df1b,on='CTEA 1A / CIP Code / DYN||8774_DYN')

df2a = pd.DataFrame(percents_2p1).reset_index()
df2a.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Measure 2P1'},inplace=True)
df2b = pd.DataFrame(spop_percents_2p1).reset_index()
df2b.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Special Pop 2P1'},inplace=True)
df2 = df2a.merge(df2b,on='CTEA 1A / CIP Code / DYN||8774_DYN')


df3a = pd.DataFrame(percents_3p1).reset_index()
df3a.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Measure 3P1'},inplace=True)
df3b = pd.DataFrame(spop_percents_3p1).reset_index()
df3b.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Special Pop 3P1'},inplace=True)
df3 = df3a.merge(df3b,on='CTEA 1A / CIP Code / DYN||8774_DYN')

m1 = df1.merge(df2,on='CTEA 1A / CIP Code / DYN||8774_DYN',how='left')
measures = m1.merge(df3,on='CTEA 1A / CIP Code / DYN||8774_DYN',how='left')
measures.rename(columns={'CTEA 1A / CIP Code / DYN||8774_DYN':'CIP Code'},inplace=True)
measures['CIP Code'] = measures['CIP Code'].map('{:.4f}'.format)

In [98]:
measures

Unnamed: 0,CIP Code,Measure 1P1,Special Pop 1P1,Measure 2P1,Special Pop 2P1,Measure 3P1,Special Pop 3P1
0,11.0201,0.333333,0.333333,0.666667,0.444444,0.131579,0.105263
1,11.0401,0.8,0.8,0.454545,0.454545,0.285714,0.25
2,11.0701,0.666667,0.5,0.545455,0.363636,0.434783,0.434783
3,11.1003,0.75,0.5625,0.727273,0.5,0.226415,0.216981
4,15.0201,0.888889,0.888889,0.9,0.9,0.033333,0.033333
5,15.0303,0.666667,0.333333,0.461538,0.153846,0.148148,0.074074
6,15.0407,0.0,0.0,0.666667,0.666667,0.0,0.0
7,15.0801,1.0,1.0,0.8,0.8,0.0,0.0
8,15.0805,0.333333,0.166667,0.461538,0.384615,0.057143,0.057143
9,15.1302,0.5,0.5,1.0,0.833333,0.1,0.1


## RESULTS

In [82]:
print('Overall Performance for 1P1 is {0:.2f}%'.format(len(num_1p1)/len(den_1p1)*100))
print('Overall Performance for 2P1 is {0:.2f}%'.format(len(num_2p1)/len(den_2p1)*100))
print('Overall Performance for for 3P1 is {0:.2f}%'.format(len(num_3p1)/len(den_3p1)*100))

Overall Performance for 1P1 is 52.44%
Overall Performance for 2P1 is 69.35%
Overall Performance for for 3P1 is 27.19%


In [83]:
print('Overall Performance for 1P1 Special Populations is {0:.2f}%'.format(len(num_1p1_spop)/len(den_1p1)*100))
print('Overall Performance for 2P1 Special Populations is {0:.2f}%'.format(len(num_2p1_spop)/len(den_2p1)*100))
print('Overall Performance for 3P1 Special Populations is {0:.2f}%'.format(len(num_3p1_spop)/len(den_3p1)*100))

Overall Performance for 1P1 Special Populations is 40.84%
Overall Performance for 2P1 Special Populations is 53.03%
Overall Performance for 3P1 Special Populations is 22.59%


In [99]:
    
print(measures)
choice = input('Export measures to Excel? Y/N:')
if ((choice =='Y') | (choice =='y')):
    measures.to_excel('measures.xlsx',index=None)
    print('Exported to measures.xlsx')
else:
    
    pass

   CIP Code  Measure 1P1  Special Pop 1P1  Measure 2P1  Special Pop 2P1  \
0   11.0201     0.333333         0.333333     0.666667         0.444444   
1   11.0401     0.800000         0.800000     0.454545         0.454545   
2   11.0701     0.666667         0.500000     0.545455         0.363636   
3   11.1003     0.750000         0.562500     0.727273         0.500000   
4   15.0201     0.888889         0.888889     0.900000         0.900000   
5   15.0303     0.666667         0.333333     0.461538         0.153846   
6   15.0407     0.000000         0.000000     0.666667         0.666667   
7   15.0801     1.000000         1.000000     0.800000         0.800000   
8   15.0805     0.333333         0.166667     0.461538         0.384615   
9   15.1302     0.500000         0.500000     1.000000         0.833333   
10  31.0101     0.625000         0.375000     0.888889         0.666667   
11  43.0103     0.366667         0.200000     0.566038         0.301887   
12  43.0107     0.312500 