## Perkins V - Calculating Performance Indicators by CIP

In [1]:
import pandas as pd
import numpy as np

### This code will calculate the Perkins 2P1 and 3P1 performance indicators.

#### If your file is not named CTEA.xlsx the code will prompt you to enter the filename. This assumes values in CTEA file are numeric and not descriptions (e.g. non-completion status codes are 4 and 6). This was designed only for the CTEA-1 but can be modified.

In [2]:
file = 'CTEA.xlsx'

try:
    data = pd.read_excel(file)
    print('File {0} loaded successfully'.format(file))
except:
    file = input('CTEA.xlsx not found. Please enter the full filename: ')
    try:
        data = pd.read_excel(file)
        print('File {0} loaded successfully'.format(file))
    except:
        print('No file found')
        exit
    

File CTEA.xlsx loaded successfully


In [3]:
# fields to use for calculations from merged CTEA 1A/B - can be renamed if your fields are different
cip_field = 'CTEA 1A / CIP Code / DYN||8774_DYN'
status_field = 'CTEA 1A / YRENDSTAT_ID / DYN||8765_DYN'
credit_field = 'CTEA 1A / Credits Earned / DYN||8763_DYN'
emp_field = 'CTEA 1B / EMPSTAT_ID / DYN||8785_DYN'
educ_field = 'CTEA 1B / EDUCSTAT_ID / DYN||8784_DYN'
id_field = 'CTEA 1A / Student ID / DYN||8761_DYN'
gender_field = 'CTEA 1A / Gender / DYN||8770_DYN'

### Get Non-traditional Crosswalk

In [4]:
xwalk_url = 'https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx'
try:
    print('Downloading Non-traditional crosswalk from ',xwalk_url)
    nontrad_xwalk = pd.read_excel(xwalk_url)
    print('Loaded successfully')
except:
    print('Unable to connect to ',xwalk_url)

Downloading Non-traditional crosswalk from  https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx
Loaded successfully


### Join CTEA to crosswalk and separate completers

In [5]:
# merge crosswalk with loaded CTEA file, joined by CIP
new_data = nontrad_xwalk[['CIP 6 2020','Female','Male']].merge(data,left_on='CIP 6 2020',right_on=cip_field,how='right')

In [6]:
# splits completers based on status codes 4 and 6 and total credits >=12
completer_set = new_data[(new_data[status_field].isin([4,6])) | (new_data[credit_field]>=12)] #make numerator data

In [7]:
# uncomment to make dataframe of noncompleters
#non_completers = pd.merge(new_data,completer_set,how='outer',on=id_field,indicator=True)
#noncomp_df = non_completers.loc[non_completers._merge == 'left_only']

## 1P1 Performance Indicator

In [8]:
num_1p1 = completer_set[(completer_set[status_field].isin([4,6])) & 
                        ((completer_set[emp_field].isin([1,2,3,7,8,9]))|
                          (completer_set[educ_field]==1))]
den_1p1 = completer_set[(completer_set[status_field].isin([4,6]))]

In [9]:
num_grp_1 = num_1p1.groupby(cip_field)[id_field].count()
den_grp_1 = den_1p1.groupby(cip_field)[id_field].count()

In [10]:
percents_1p1 = num_grp_1.divide(den_grp_1,fill_value=0)

In [11]:
# view percents by CIP
percents_1p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.333333
11.0401    0.800000
11.0701    0.666667
11.1003    0.750000
15.0201    0.888889
15.0303    0.666667
15.0407    0.000000
15.0801    1.000000
15.0805    0.333333
15.1302    0.500000
31.0101    0.625000
43.0103    0.366667
43.0107    0.312500
43.0114    0.500000
43.0204    0.700000
44.0000    0.705882
45.0702    1.000000
47.0101    0.555556
47.0201    0.000000
48.0508    0.369565
48.0510    0.500000
50.0102    0.230769
50.0406    0.500000
50.0409    0.600000
50.0410    0.210526
51.0000    0.333333
51.0707    0.555556
51.0907    0.625000
51.0908    0.166667
51.1501    0.722222
51.3801    0.437500
52.0201    0.605505
52.0205    0.692308
52.0302    0.454545
52.0401    0.400000
52.0801    0.800000
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

## 2P1 Performance Indicator

In [12]:
num_2p1 = completer_set[(completer_set[status_field].isin([4,6]))]
den_2p1 = completer_set[(completer_set[status_field].isin([4,5,6]))]

In [13]:
num_grp_2 = num_2p1.groupby(cip_field)[id_field].count()
den_grp_2 = den_2p1.groupby(cip_field)[id_field].count()

In [14]:
percents_2p1 = num_grp_2.divide(den_grp_2,fill_value=0)

In [15]:
# view percents by CIP
percents_2p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.666667
11.0401    0.454545
11.0701    0.545455
11.1003    0.727273
15.0201    0.900000
15.0303    0.461538
15.0407    0.666667
15.0616    0.000000
15.0801    0.800000
15.0805    0.461538
15.1302    1.000000
31.0101    0.888889
43.0103    0.566038
43.0107    0.800000
43.0114    1.000000
43.0204    0.416667
44.0000    0.666667
45.0702    1.000000
47.0101    0.782609
47.0201    0.700000
48.0508    0.901961
48.0510    0.909091
50.0102    0.684211
50.0406    0.769231
50.0409    0.535714
50.0410    0.612903
51.0000    0.103448
51.0707    0.500000
51.0907    1.000000
51.0908    0.800000
51.1501    0.750000
51.3801    0.872727
52.0201    0.731544
52.0205    0.866667
52.0302    0.628571
52.0401    0.769231
52.0703    0.000000
52.0801    0.909091
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

## 3P1 Performance Indicator

In [16]:
nontrad_df = completer_set[((completer_set['Female']=='Y') & (completer_set[gender_field]==2))|((completer_set['Male']=='Y') & (completer_set[gender_field]==1))]
cip_ntrad = nontrad_df.groupby(cip_field)[id_field].count()
cip_heads = completer_set[(completer_set['Female']=='Y') | (completer_set['Male']=='Y')].groupby(cip_field)[id_field].count()
percents_3p1 = cip_ntrad.divide(cip_heads,fill_value=0)

In [17]:
# view percents by CIP
percents_3p1

CTEA 1A / CIP Code / DYN||8774_DYN
11.0201    0.131579
11.0401    0.285714
11.0701    0.434783
11.1003    0.226415
13.1314    0.000000
15.0201    0.033333
15.0303    0.148148
15.0407    0.000000
15.0616    0.000000
15.0801    0.000000
15.0805    0.057143
15.1302    0.100000
43.0103    0.429752
43.0107    0.285714
43.0114    0.500000
45.0702    0.000000
47.0101    0.066667
47.0201    0.023810
48.0508    0.215909
48.0510    0.185185
51.0707    0.022222
51.1501    0.367347
51.3801    0.163793
52.0201    0.493631
52.0205    0.108696
52.0302    0.343750
Name: CTEA 1A / Student ID / DYN||8761_DYN, dtype: float64

In [18]:
# load measures into dataframe
df1 = pd.DataFrame(percents_1p1).reset_index()
df1.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Measure 1P1'},inplace=True)
df2 = pd.DataFrame(percents_2p1).reset_index()
df2.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Measure 2P1'},inplace=True)
df3 = pd.DataFrame(percents_3p1).reset_index()
df3.rename(columns={'CTEA 1A / Student ID / DYN||8761_DYN':'Measure 3P1'},inplace=True)
m1 = df1.merge(df2,on='CTEA 1A / CIP Code / DYN||8774_DYN',how='left')
measures = m1.merge(df3,on='CTEA 1A / CIP Code / DYN||8774_DYN',how='left')
#measures.fillna('0.000000',inplace=True)

In [19]:
num_3p1 = len(nontrad_df)

In [20]:
den_3p1 = len(completer_set[(completer_set['Female']=='Y') | (completer_set['Male']=='Y')])

## RESULTS

In [21]:
print('Overall Performance for 1P1 is {0:.2f}%'.format(len(num_1p1)/len(den_1p1)*100))
print('Overall Performance for 2P1 is {0:.2f}%'.format(len(num_2p1)/len(den_2p1)*100))
print('Overall Performance for for 3P1 is {0:.2f}%'.format(num_3p1/den_3p1*100))

Overall Performance for 1P1 is 52.44%
Overall Performance for 2P1 is 69.35%
Overall Performance for for 3P1 is 27.19%


In [None]:
    
print(measures)
choice = input('Export measures to Excel? Y/N:')
if ((choice =='Y') | (choice =='y')):
    measures.to_excel('measures.xlsx',index=None)
    print('Exported to measures.xlsx')
else:
    
    pass

    CTEA 1A / CIP Code / DYN||8774_DYN  Measure 1P1  Measure 2P1  Measure 3P1
0                              11.0201     0.333333     0.666667     0.131579
1                              11.0401     0.800000     0.454545     0.285714
2                              11.0701     0.666667     0.545455     0.434783
3                              11.1003     0.750000     0.727273     0.226415
4                              15.0201     0.888889     0.900000     0.033333
5                              15.0303     0.666667     0.461538     0.148148
6                              15.0407     0.000000     0.666667     0.000000
7                              15.0801     1.000000     0.800000     0.000000
8                              15.0805     0.333333     0.461538     0.057143
9                              15.1302     0.500000     1.000000     0.100000
10                             31.0101     0.625000     0.888889          NaN
11                             43.0103     0.366667     0.566038