In [1]:
import pandas as pd
import numpy as np

transfer_file_location = "s3://prm-gp2gp-data-sandbox-dev/transfers-duplicates-hypothesis/"
transfer_files = [
    "9-2020-transfers.parquet",
    "10-2020-transfers.parquet",
    "11-2020-transfers.parquet",
    "12-2020-transfers.parquet",
    "1-2021-transfers.parquet",
    "2-2021-transfers.parquet"
]
transfer_input_files = [transfer_file_location + f for f in transfer_files]
transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))
# This is only needed when using transfers-duplicates-hypothesis datasets
transfers_raw = transfers_raw.drop(["sending_supplier", "requesting_supplier"], axis=1)




# Given the findings in PRMT-1742 - many duplicate EHR errors are misclassified, the below reclassifies the relevant data
successful_transfers_bool = transfers_raw['request_completed_ack_codes'].apply(lambda x: True in [(np.isnan(i) or i==15) for i in x])
transfers = transfers_raw.copy()
transfers.loc[successful_transfers_bool, "status"] = "INTEGRATED"

# Pending (Sender Error) Code
pending_sender_error_codes=[6,7,10,24,30,23,14,99]
transfers_with_pending_sender_code_bool=transfers['sender_error_code'].isin(pending_sender_error_codes)
transfers_with_pending_with_error_bool=transfers['status']=='PENDING_WITH_ERROR'
transfers_which_need_pending_to_failure_change_bool=transfers_with_pending_sender_code_bool & transfers_with_pending_with_error_bool
transfers.loc[transfers_which_need_pending_to_failure_change_bool,'status']='FAILED'

# Add integrated Late status
eight_days_in_seconds=8*24*60*60
transfers_after_sla_bool=transfers['sla_duration']>eight_days_in_seconds
transfers_with_integrated_bool=transfers['status']=='INTEGRATED'
transfers_integrated_late_bool=transfers_after_sla_bool & transfers_with_integrated_bool
transfers.loc[transfers_integrated_late_bool,'status']='INTEGRATED LATE'


# Supplier name mapping
supplier_renaming = {
    "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)":"EMIS",
    "IN PRACTICE SYSTEMS LTD":"Vision",
    "MICROTEST LTD":"Microtest",
    "THE PHOENIX PARTNERSHIP":"TPP",
    None: "Unknown"
}

asid_lookup_file = "s3://prm-gp2gp-data-sandbox-dev/asid-lookup/asidLookup-Mar-2021.csv.gz"
asid_lookup = pd.read_csv(asid_lookup_file)
lookup = asid_lookup[["ASID", "MName", "NACS","OrgName"]]

transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'requesting_supplier', 'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'sending_supplier', 'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code'}, axis=1)

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

In [2]:
practice_status_table=pd.pivot_table(transfers,index='requesting_practice_asid',columns='status',values='conversation_id',aggfunc='count').fillna(0)
practice_status_table['TOTAL']=practice_status_table.sum(axis=1)

In [3]:
def quantile_status_table(quantiles,practice_profile_data,status):
    #status='INTEGRATED LATE'
    #practice_profile_data=practice_status_table.copy()

    practice_profile_data=practice_profile_data.sort_values(by=status,ascending=False)

    cumulative_percentage=practice_profile_data[status].cumsum()/practice_profile_data[status].sum()
    #quantiles=5
    practice_profile_data['Percentile Group']=(100/quantiles)*np.ceil(cumulative_percentage*quantiles)

    practice_profile_data=practice_profile_data.groupby('Percentile Group').agg({status:'sum','TOTAL':'sum','INTEGRATED':'count'}).astype(int)
    practice_profile_data=practice_profile_data.rename({status:'Total ' + status,'TOTAL':'Total Transfers','INTEGRATED':'Total Practices'},axis=1)

    practice_profile_data_percentages=(100*practice_profile_data/practice_profile_data.sum()).round(2)
    practice_profile_data_percentages.columns= "% " + practice_profile_data_percentages.columns

    return pd.concat([practice_profile_data,practice_profile_data_percentages],axis=1)
    

In [4]:
quantile_status_table(5,practice_status_table,'INTEGRATED LATE')

Unnamed: 0_level_0,Total INTEGRATED LATE,Total Transfers,Total Practices,% Total INTEGRATED LATE,% Total Transfers,% Total Practices
Percentile Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20.0,15732,45036,34,19.83,3.35,0.51
40.0,15990,61380,150,20.15,4.57,2.24
60.0,15848,87785,289,19.97,6.54,4.31
80.0,15898,133811,609,20.04,9.96,9.08
100.0,15878,1015222,5622,20.01,75.58,83.86


In [5]:
quantile_status_table(5,practice_status_table,'FAILED')

Unnamed: 0_level_0,Total FAILED,Total Transfers,Total Practices,% Total FAILED,% Total Transfers,% Total Practices
Percentile Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20.0,9196,59766,65,19.93,4.45,0.97
40.0,9261,120182,234,20.07,8.95,3.49
60.0,9225,229614,545,19.99,17.09,8.13
80.0,9234,341793,1115,20.01,25.45,16.63
100.0,9230,591879,4745,20.0,44.06,70.78


In [6]:
quantile_status_table(5,practice_status_table,'PENDING')

Unnamed: 0_level_0,Total PENDING,Total Transfers,Total Practices,% Total PENDING,% Total Transfers,% Total Practices
Percentile Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20.0,7743,19416,34,19.81,1.45,0.51
40.0,7874,70205,129,20.14,5.23,1.92
60.0,7835,134791,360,20.05,10.03,5.37
80.0,7813,342693,1009,19.99,25.51,15.05
100.0,7822,776129,5172,20.01,57.78,77.15


In [7]:
quantile_status_table(5,practice_status_table,'PENDING_WITH_ERROR')

Unnamed: 0_level_0,Total PENDING_WITH_ERROR,Total Transfers,Total Practices,% Total PENDING_WITH_ERROR,% Total Transfers,% Total Practices
Percentile Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20.0,637,113252,114,19.91,8.43,1.7
40.0,641,126630,284,20.04,9.43,4.24
60.0,641,135760,440,20.04,10.11,6.56
80.0,640,156392,640,20.01,11.64,9.55
100.0,640,811200,5226,20.01,60.39,77.95


In [8]:
non_integrated_table=practice_status_table.copy()
non_integrated_table['Paper Fallback']=non_integrated_table['TOTAL']-non_integrated_table['INTEGRATED']
quantile_status_table(10,non_integrated_table,'Paper Fallback')

Unnamed: 0_level_0,Total Paper Fallback,Total Transfers,Total Practices,% Total Paper Fallback,% Total Transfers,% Total Practices
Percentile Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10.0,16540,29577,15,9.86,2.2,0.22
20.0,16903,50136,70,10.07,3.73,1.04
30.0,16827,56370,110,10.03,4.2,1.64
40.0,16779,56438,159,10.0,4.2,2.37
50.0,16826,79233,227,10.03,5.9,3.39
60.0,16764,99197,328,9.99,7.38,4.89
70.0,16792,145447,477,10.01,10.83,7.12
80.0,16790,183584,688,10.01,13.67,10.26
90.0,16777,247372,1107,10.0,18.42,16.51
100.0,16780,395880,3523,10.0,29.47,52.55
