# Perkins V - Calculating Performance Indicators by CIP

In [None]:
# Author: Matthew Fikes
# Modified: 4/3/23
import pandas as pd


## This code will calculate the Perkins 1P1, 2P1, and 3P1 performance indicators for Chart 3 of the CLNA.

#### If your file is not named CTEA.xlsx the code will prompt you to enter the filename. This assumes values in CTEA file are numeric and not descriptions (e.g. non-completion status codes are 4 and 6). This was designed only for the CTEA-1 but can be modified.

In [None]:
file = 'CTEA.xlsx'

try:
    data = pd.read_excel(file)
    print(f'File {file} loaded successfully')
except:
    file = input(f'{file} not found. Please enter the full filename: ')
    try:
        data = pd.read_excel(file)
        print('File {0} loaded successfully'.format(file))
    except:
        print('No file found')
        exit
    

In [None]:
data.rename(columns=lambda x: x.split(' / ')[1],inplace=True)

### Static Variables
These can be modified to suit your individual file. Targets for 1P1, 2P1, and 3P1 ar also included so they can be easily changed.

In [None]:
target_1 = .5000
target_2 = .2100
target_3 = .1700

In [None]:
# fields to use for calculations from merged CTEA 1A/B - can be renamed if your fields are different
cip_field = 'CIP Code'
status_field = 'YRENDSTAT_ID'
credit_field = 'Credits Earned'
emp_field = 'EMPSTAT_ID'
educ_field = 'EDUCSTAT_ID'
id_field = 'Student ID'
gender_field = 'Gender'

In [None]:
# special population fields
disab_field = 'Disabled'
displ_field = 'DISPLACED_HM'
econdis_field = 'Economic Disadvantage'
homels_field = 'Homeless'
lim_eng_field = 'Limited English'
migr_field = 'Migrant'
sparnt_field = 'Single Parent'
youth_ao_field = 'Youth Aged Out'
youth_af_field = 'Youth Armed Forces'
spop_fields = [disab_field,displ_field,econdis_field,homels_field,lim_eng_field,migr_field,sparnt_field,youth_ao_field,youth_af_field]

### Get Non-traditional Crosswalk

In [None]:
xwalk_url = 'https://s3.amazonaws.com/PCRN/docs/REVISED_FINAL-2020-Nontraditional-Crosswalk-6-9-2021.xlsx'
try:
    print('Downloading Non-traditional crosswalk from ',xwalk_url)
    nontrad_xwalk = pd.read_excel(xwalk_url)
    print('Loaded successfully')
except:
    print('Unable to connect to ',xwalk_url)

### Join CTEA to crosswalk and separate completers

In [None]:
# merge crosswalk with loaded CTEA file, joined by CIP
new_data = nontrad_xwalk[['CIP 6 2020','Female','Male']].merge(data,left_on='CIP 6 2020',right_on=cip_field,how='inner')

In [None]:
cip_list = pd.DataFrame(new_data['CIP 6 2020'].unique().tolist(),columns={'CIP'})
cip_list['CIP Code'] = cip_list['CIP'].map('{:.4f}'.format)

In [None]:
# splits completers based on status codes 4 and 6 and total credits >=12
completer_set = new_data[(new_data[status_field].isin([4,6])) | (new_data[credit_field]>=12)] #make numerator data

In [None]:
# uncomment to make dataframe of noncompleters
#non_completers = pd.merge(new_data,completer_set,how='outer',on=id_field,indicator=True)
#noncomp_df = non_completers.loc[non_completers._merge == 'left_only']

## 1P1 Performance Indicator

In [None]:
num_1p1 = completer_set[(completer_set[status_field].isin([4,6])) & 
                        ((completer_set[emp_field].isin([1,2,3,7,8,9]))|
                          (completer_set[educ_field]==1))]
den_1p1 = completer_set[(completer_set[status_field].isin([4,6]))]

In [None]:
# Calculate performance for each special population by CIP, return values below target
rows_1p1 = []
for i in spop_fields:

    lim_num = pd.DataFrame(num_1p1[(num_1p1[i]==1)])
    lim_den = pd.DataFrame(den_1p1[(den_1p1[i]==1)])

    num = pd.DataFrame(lim_num).filter([cip_field,i]).groupby(cip_field).count()
    dem = pd.DataFrame(lim_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(dem)
    underperf = pct[(pct.values<target_1)]
    
    rows_1p1.append(underperf.reset_index())
    
spop_1p1 = pd.concat(rows_1p1).dropna(axis=1,how='all').groupby('CIP Code').max().reset_index()

In [None]:
num_grp_1 = num_1p1.groupby(cip_field)[id_field].count()
den_grp_1 = den_1p1.groupby(cip_field)[id_field].count()
percents_1p1 = num_grp_1.divide(den_grp_1,fill_value=0)

In [None]:
percents_1p1

### MOST RECENT PERFORMANCE DATA BY PROGRAM

In [None]:
pf1p1a = zip(percents_1p1.index,percents_1p1.values)
for i in pf1p1a:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

### PROGRAMS NOT MEETING TARGET

In [None]:
p_unmet_1p1 = pd.DataFrame(percents_1p1[(percents_1p1.values<=target_1)]).reset_index().set_index('CIP Code')
p_unmet_1p1_pf = percents_1p1[(percents_1p1.values<=target_1)]

In [None]:
pf1p1b = zip(p_unmet_1p1_pf.index,p_unmet_1p1_pf.values)
for i in pf1p1b:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

### SPECIAL POPULATIONS NOT MEETING TARGET WITHIN PROGRAMS

In [None]:
# Special Pops falling below 1P1 target, by CIP
unmet_spop_1p1 = spop_1p1.set_index('CIP Code')


In [None]:
for items in rows_1p1:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))

    except:
        print('NA')
    

## 2P1 Performance Indicator

In [None]:
num_2p1 = completer_set[(completer_set[status_field].isin([4,6]))]
den_2p1 = completer_set[(completer_set[status_field].isin([4,5,6]))]
num_grp_2 = num_2p1.groupby(cip_field)[id_field].count()
den_grp_2 = den_2p1.groupby(cip_field)[id_field].count()

In [None]:
# Calculate performance for each special population by CIP, return values below target
rows_2p1 = []
for i in spop_fields:

    lim_num = pd.DataFrame(num_2p1[(num_2p1[i]==1)])
    lim_den = pd.DataFrame(den_2p1[(den_2p1[i]==1)])

    num = pd.DataFrame(lim_num).filter([cip_field,i]).groupby(cip_field).count()
    den = pd.DataFrame(lim_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(den)
    
    underperf = pct[(pct.values<=target_2)]
    if len(underperf)>0:
        rows_2p1.append(underperf.reset_index())
    
try:
    spop_2p1 = pd.concat(rows_2p1).dropna(axis=1,how='all').groupby('CIP Code').max().reset_index()
except:
    
    spop_2p1 = pd.DataFrame(columns={'CIP Code'})

In [None]:
percents_2p1 = num_grp_2.divide(den_grp_2,fill_value=0)
percents_2p1

### MOST RECENT PERFORMANCE DATA BY PROGRAM

In [None]:
pf2p1a = zip(percents_2p1.index,percents_2p1.values)
for i in pf2p1a:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

### PROGRAMS NOT MEETING TARGET

In [None]:
p_unmet_2p1 = pd.DataFrame(percents_2p1[(percents_2p1.values<=target_2)]).reset_index().set_index('CIP Code')
p_unmet_2p1_pf = percents_2p1[(percents_2p1.values<=target_2)]

In [None]:
pf2p1b = zip(p_unmet_2p1_pf.index,p_unmet_2p1_pf.values)
for i in pf2p1b:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

### SPECIAL POPULATIONS NOT MEETING TARGET WITHIN PROGRAMS

In [None]:
# Special Pops falling below 1P1 target, by CIP
unmet_spop_2p1 = spop_2p1.set_index('CIP Code')

In [None]:
for items in rows_2p1:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))

    except:
        print('NA')
    

## 3P1 Performance Indicator

In [None]:
num_3p1 = completer_set[((completer_set['Female']=='Y') & (completer_set[gender_field]==2))|((completer_set['Male']=='Y') & (completer_set[gender_field]==1))]
num_grp_3 = num_3p1.groupby(cip_field)[id_field].count()
den_3p1 = completer_set[(completer_set['Female']=='Y') | (completer_set['Male']=='Y')]
den_grp_3 = den_3p1.groupby(cip_field)[id_field].count()
percents_3p1 = num_grp_3.divide(den_grp_3,fill_value=0)

In [None]:
# Calculate performance for each special population by CIP, return values below target
rows_3p1 = []
for i in spop_fields:

    lim_num = pd.DataFrame(num_3p1[(num_3p1[i]==1)])
    lim_den = pd.DataFrame(den_3p1[(den_3p1[i]==1)])

    num = pd.DataFrame(lim_num).filter([cip_field,i]).groupby(cip_field).count()
    dem = pd.DataFrame(lim_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(dem)
    underperf = pct[(pct.values<=target_3)]
    
    rows_3p1.append(underperf.reset_index())
    
spop_3p1 = pd.concat(rows_3p1).dropna(axis=1,how='all').groupby('CIP Code').max().reset_index()

### MOST RECENT PERFORMANCE DATA BY PROGRAM

In [None]:
pf3p1a = zip(percents_3p1.index,percents_3p1.values)
for i in pf3p1a:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

### PROGRAMS NOT MEETING TARGET

In [None]:
p_unmet_3p1 = pd.DataFrame(percents_3p1[(percents_3p1.values<=target_3)]).reset_index().set_index('CIP Code')
p_unmet_3p1_pf = percents_3p1[(percents_3p1.values<=target_3)]

In [None]:
pf3p1b = zip(p_unmet_3p1_pf.index,p_unmet_3p1_pf.values)
for i in pf3p1b:
    print('{0:.4f}---{1:.2f}%'.format(i[0],i[1]*100))

### SPECIAL POPULATIONS NOT MEETING TARGET WITHIN PROGRAMS

In [None]:
# Special Pops falling below 1P1 target, by CIP
unmet_spop_3p1 = spop_3p1.set_index('CIP Code')


In [None]:
for items in rows_3p1:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))

    except:
        print('NA')
    

## RESULTS

In [None]:
# load measures into dataframe
df1a = pd.DataFrame(percents_1p1).reset_index()
df1a['Target 1'] = target_1
df1a.rename(columns={id_field:'Measure 1P1'},inplace=True)
df1b = df1a.merge(p_unmet_1p1,on='CIP Code',how='left')
df1b.rename(columns={id_field:'Unmet 1P1'},inplace=True)
df1c =spop_1p1
df1c.rename(columns={'Disabled':'Unmet 1P1 Disabled',
                     'DISPLACED_HM': 'Unmet 1P1 Displaced',
                     'Economic Disadvantage': 'Unmet 1P1 Econ Dis',
                     'Homeless': 'Unmet 1P1 Homeless',
                     'Limited English': 'Unmet 1P1 Lim Eng',
                     'Migrant': 'Unmet 1P1 Migrant',
                     'Single Parent': 'Unmet 1P1 Sing Par',
                     'Youth Aged Out': 'Unmet 1P1 Aged Out',
                     'Youth Armed Forces': 'Unmet 1P1 Armed Forces'
                     
                    },inplace=True)
df1 = df1b.merge(df1c,on='CIP Code',how='left')



df2a = pd.DataFrame(percents_2p1).reset_index()
df2a['Target 2'] = target_2
df2a.rename(columns={'Student ID':'Measure 2P1'},inplace=True)
df2b = df2a.merge(p_unmet_2p1,on='CIP Code',how='left')
df2b.rename(columns={'Student ID':'Unmet 2P1'},inplace=True)
df2c =spop_2p1
df2c.rename(columns={'Disabled':'Unmet 2P1 Disabled',
                     'DISPLACED_HM': 'Unmet 2P1 Displaced',
                     'Economic Disadvantage': 'Unmet 2P1 Econ Dis',
                     'Homeless': 'Unmet 2P1 Homeless',
                     'Limited English': 'Unmet 2P1 Lim Eng',
                     'Migrant': 'Unmet 2P1 Migrant',
                     'Single Parent': 'Unmet 2P1 Sing Par',
                     'Youth Aged Out': 'Unmet 2P1 Aged Out',
                     'Youth Armed Forces': 'Unmet 2P1 Armed Forces'
                     
                    },inplace=True)
df2 = df2b.merge(df2c,on=cip_field,how='left')


df3a = pd.DataFrame(percents_3p1).reset_index()
df3a['Target 3'] = target_3
df3a.rename(columns={'Student ID':'Measure 3P1'},inplace=True)
df3b = df3a.merge(p_unmet_3p1,on='CIP Code',how='left')
df3b.rename(columns={'Student ID':'Unmet 3P1'},inplace=True)
df3c =spop_3p1
df3c.rename(columns={'Disabled':'Unmet 3P1 Disabled',
                     'DISPLACED_HM': 'Unmet 3P1 Displaced',
                     'Economic Disadvantage': 'Unmet 3P1 Econ Dis',
                     'Homeless': 'Unmet 3P1 Homeless',
                     'Limited English': 'Unmet 3P1 Lim Eng',
                     'Migrant': 'Unmet 3P1 Migrant',
                     'Single Parent': 'Unmet 3P1 Sing Par',
                     'Youth Aged Out': 'Unmet 3P1 Aged Out',
                     'Youth Armed Forces': 'Unmet 3P1 Armed Forces'
                     
                    },inplace=True)
df3 = df3b.merge(df3c,on=cip_field,how='left')

m1 = df1.merge(df2,on=cip_field,how='left')
measures = m1.merge(df3,on=cip_field,how='left')
measures['CIP Code'] = measures['CIP Code'].map('{:.4f}'.format)
final_measures = measures.merge(cip_list,on='CIP Code',how='right')


In [None]:
print('Overall Performance for 1P1 is {0:.2f}%'.format(len(num_1p1)/len(den_1p1)*100))
print('Overall Performance for 2P1 is {0:.2f}%'.format(len(num_2p1)/len(den_2p1)*100))
print('Overall Performance for for 3P1 is {0:.2f}%'.format(len(num_3p1)/len(den_3p1)*100))

In [None]:
final_measures.dropna(subset=['CIP Code'])

In [None]:
measures.to_excel('measures.xlsx',index=None)
print('Exported to measures.xlsx')


## Chart 7b: Access and Equity

These lines will output the answers for Chart7b of the CLNA. Any CIPs that do not appear on the list have zero enrollment or completion for that special population. If you wish to show all of them, uncomment the print statements under each else in the lines below.

In [None]:
concentrator_set = new_data[(new_data[credit_field]>=12)]

In [None]:
spop_enrl = concentrator_set
spop_compl = completer_set[(completer_set[status_field].isin([4,6]))]

num_grp_2 = num_2p1.groupby(cip_field)[id_field].count()
den_grp_2 = den_2p1.groupby(cip_field)[id_field].count()

In [None]:
nontrad_enrl = concentrator_set[((concentrator_set['Female']=='Y') & (concentrator_set[gender_field]==2))|((concentrator_set['Male']=='Y') & (concentrator_set[gender_field]==1))]
nontrad_comp = spop_compl[((spop_compl['Female']=='Y') & (spop_compl[gender_field]==2))|((spop_compl['Male']=='Y') & (spop_compl[gender_field]==1))]

nt_enrl = len(nontrad_enrl)
nt_comp = len(nontrad_comp)

nt_num = pd.DataFrame(nontrad_enrl).groupby(cip_field)['Student ID'].count()
nt_den = pd.DataFrame(concentrator_set).groupby(cip_field)['Student ID'].count()
nt_pct = nt_num.divide(nt_den)

ntc_num = pd.DataFrame(nontrad_comp).groupby(cip_field)['Student ID'].count()
ntc_den = pd.DataFrame(spop_compl).groupby(cip_field)['Student ID'].count()
ntc_pct = ntc_num.divide(ntc_den)


### Non-Traditional Enrollment

In [None]:
for i in range(1,len(nt_pct)):
    cip = nt_pct.index[i]
    meas = nt_pct.values[i]
    if meas >0:
        
        print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
    else:
        #print('{0:.4f}---{1:.2f}%'.format(cip,0))
        next

### Non-traditional Completion Rates

In [None]:
for i in range(1,len(ntc_pct)):
    cip = ntc_pct.index[i]
    meas = ntc_pct.values[i]
    if meas >0:
        
        print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
    else:
        #print('{0:.4f}---{1:.2f}%'.format(cip,0))
        next

## Enrollment and Completion by Special Populations

In [None]:
# Calculate performance for each special population by CIP, return values below target
enr_rate = []
comp_rate = []
for i in spop_fields:

    sp_num = pd.DataFrame(spop_enrl[(spop_enrl[i]==1)])
    sp_den = spop_enrl
    
    spc_num = pd.DataFrame(spop_compl[(spop_compl[i]==1)])
    spc_den = spop_compl
  
    num = pd.DataFrame(sp_num).filter([cip_field,i]).groupby(cip_field).count()
    den = pd.DataFrame(sp_den).filter([cip_field,i]).groupby(cip_field).count()
    pct = num.divide(den)
    
    cnum = pd.DataFrame(spc_num).filter([cip_field,i]).groupby(cip_field).count()
    cden = pd.DataFrame(spc_den).filter([cip_field,i]).groupby(cip_field).count()
    cpct = cnum.divide(cden)
    
    
    enr_rate.append(pct.reset_index())
    
    comp_rate.append(cpct.reset_index())
    


### Enrollment Rates by Special Populations

In [None]:
for items in enr_rate:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            if meas>0:
                
                print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
            else:
                next

    except:
        
        print('NA')
    

### Completion rates by special populations

In [None]:
for items in comp_rate:
    try:
        print(items.columns[1])
        for v in items.values:
            cip = v[0]
            meas = v[1]
            
            if meas>=0:
                print('{0:.4f}---{1:.2f}%'.format(cip,meas*100))
                
            else:
                #print('{0:.4f}---{1:.1f}%'.format(cip,0))
                next

    except:
        print('NA')
    