# [“HYPOTHESIS”] Creation of tables to determine and assess the most common causes of failure to integrate 

- Import Data and adjust for duplicates issue
- Implement the sender error pipeline change
- Add in supplier

Table 1:
- Relabel status to reflect integrated late
- Add in column for absolutely no errors
- Create seperate dataframes for each error type and concatenate
- generate the pivot table
- Add in Error Descriptions/Name
- Add in Supplier Pathway %s





- Create Monthly Views


In [1]:
import pandas as pd
import numpy as np

## Import 6 months of data and adjust for duplicates issue

In [2]:
transfer_file_location = "s3://prm-gp2gp-data-sandbox-dev/transfers-duplicates-hypothesis/"
transfer_files = [
    "9-2020-transfers.parquet",
    "10-2020-transfers.parquet",
    "11-2020-transfers.parquet",
    "12-2020-transfers.parquet",
    "1-2021-transfers.parquet",
    "2-2021-transfers.parquet"
]
transfer_input_files = [transfer_file_location + f for f in transfer_files]
transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

transfers_raw=transfers_raw.drop(['requesting_supplier','sending_supplier'],axis=1)

asid_lookup_file = "s3://prm-gp2gp-data-sandbox-dev/asid-lookup/asidLookup-Mar-2021.csv.gz"
asid_lookup = pd.read_csv(asid_lookup_file)

In [3]:
transfers=transfers_raw.copy()
successful_transfers_bool = transfers['request_completed_ack_codes'].apply(lambda x: True in [(np.isnan(i) or i==15) for i in x])
transfers.loc[successful_transfers_bool,'status']='INTEGRATED'

In [4]:
pending_sender_error_codes=[6,7,10,24,30,23,14,99]
transfers_with_pending_sender_code_bool=transfers['sender_error_code'].isin(pending_sender_error_codes)
transfers_with_pending_with_error_bool=transfers['status']=='PENDING_WITH_ERROR'
transfers_which_need_pending_to_failure_change_bool=transfers_with_pending_sender_code_bool & transfers_with_pending_with_error_bool
transfers.loc[transfers_which_need_pending_to_failure_change_bool,'status']='FAILED'

In [5]:
# Add in who the supplier is
supplier_renaming = {
    "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)":"EMIS",
    "IN PRACTICE SYSTEMS LTD":"Vision",
    "MICROTEST LTD":"Microtest",
    "THE PHOENIX PARTNERSHIP":"TPP",
    None: "Unknown"
}

lookup = asid_lookup[["ASID", "MName"]]
transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'requesting_supplier', 'ASID': 'requesting_supplier_asid'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'sending_supplier', 'ASID': 'sending_supplier_asid'}, axis=1)

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

In [6]:
error_code_lookup_file = pd.read_csv("https://raw.githubusercontent.com/nhsconnect/prm-gp2gp-data-sandbox/master/data/gp2gp_response_codes.csv")
error_code_lookup_file

Unnamed: 0,ErrorCode,ErrorName,ResponseText
0,6,Not at surgery,Patient not at surgery
1,7,GP2GP disabled,GP2GP Messaging is not enabled on this system
2,9,Unexpected EHR,EHR Extract received without corresponding req...
3,10,Failed to generate,Failed to successfully generate EHR Extract
4,11,Failed to integrate,Failed to successfully integrate EHR Extract
5,12,Duplicate EHR,Duplicate EHR Extract received
6,13,Config issue,The system’s configuration prevents it from pr...
7,14,Req not LM compliant,Message not sent because requesting practice i...
8,15,ABA suppressed,A-B-A EHR Extract Received and Stored As Suppr...
9,17,ABA wrong patient,A-B-A EHR Extract Received and rejected due to...


# Table 1: High Level View of Issues

## Relabel status to reflect integrated late

In [7]:
eight_days_in_seconds=8*24*60*60
transfers_after_sla_bool=transfers['sla_duration']>eight_days_in_seconds
transfers_with_integrated_bool=transfers['status']=='INTEGRATED'
transfers_integrated_late_bool=transfers_after_sla_bool & transfers_with_integrated_bool
transfers.loc[transfers_integrated_late_bool,'status']='INTEGRATED LATE'

## Add in column for absolutely no errors


In [8]:
transfers_without_sender_error_bool=transfers['sender_error_code'].isna()
transfers_without_intermediate_error_bool=transfers['intermediate_error_codes'].apply(len)==0
transfers_without_final_ack_error_bool=transfers['request_completed_ack_codes'].apply(lambda lis: [x for x in lis if np.isfinite(x)]).apply(len)==0

transfers_without_any_error_bool=transfers_without_sender_error_bool & transfers_without_intermediate_error_bool & transfers_without_final_ack_error_bool
transfers['No error codes']=np.nan
transfers.loc[transfers_without_any_error_bool,'No error codes']='No Error'

## Add in month column

In [9]:
transfers['month']=transfers['date_requested'].dt.to_period('M')

## Create seperate dataframes for each error type and concatenate

In [10]:
reduced_transfers=transfers[['requesting_supplier','sending_supplier','sender_error_code','intermediate_error_codes','request_completed_ack_codes','No error codes','status','conversation_id','month']]

### Sender Error Table

In [11]:
sender_table=reduced_transfers.drop(['intermediate_error_codes','request_completed_ack_codes','No error codes'],axis=1)
sender_table=sender_table.loc[~sender_table['sender_error_code'].isna()]
sender_table=sender_table.rename({'sender_error_code':'Error Code'},axis=1)
sender_table['Error Type']='Sender'
# sender_table=sender_table.groupby(['sending_supplier','requesting_supplier','Error Type','Error Code','status']).agg('count').rename({'conversation_id':'Total Volume'},axis=1).reset_index()
sender_table=pd.pivot_table(sender_table, index=['sending_supplier','requesting_supplier','Error Type','Error Code','status'],columns='month', aggfunc='count', values='conversation_id').fillna(0)
sender_table['Total Volume']=sender_table.sum(axis=1)
sender_table=sender_table.astype(int)

### Intermediate Error Table

In [12]:
intermediate_table=reduced_transfers.drop(['sender_error_code','request_completed_ack_codes','No error codes'],axis=1)
intermediate_table=intermediate_table.loc[intermediate_table['intermediate_error_codes'].apply(len)>0]
intermediate_table=intermediate_table.explode('intermediate_error_codes')
intermediate_table=intermediate_table.rename({'intermediate_error_codes':'Error Code'},axis=1)
intermediate_table['Error Type']='Intermediate'
intermediate_table=pd.pivot_table(intermediate_table, index=['sending_supplier','requesting_supplier','Error Type','Error Code','status'],columns='month', aggfunc='count', values='conversation_id').fillna(0)
intermediate_table['Total Volume']=intermediate_table.sum(axis=1)
intermediate_table=intermediate_table.astype(int)

### Final Request Acknowledgements Table

In [13]:
reqack_table=reduced_transfers.drop(['sender_error_code','intermediate_error_codes','No error codes'],axis=1)
reqack_table['request_completed_ack_codes']=reqack_table['request_completed_ack_codes'].apply(lambda lis: [x for x in lis if np.isfinite(x)])
reqack_table=reqack_table.loc[reqack_table['request_completed_ack_codes'].apply(len)>0]
reqack_table=reqack_table.explode('request_completed_ack_codes')
reqack_table=reqack_table.rename({'request_completed_ack_codes':'Error Code'},axis=1)
reqack_table['Error Type']='Final Request Acknowledgment'
reqack_table=pd.pivot_table(reqack_table, index=['sending_supplier','requesting_supplier','Error Type','Error Code','status'],columns='month', aggfunc='count', values='conversation_id').fillna(0)
reqack_table['Total Volume']=reqack_table.sum(axis=1)
reqack_table=reqack_table.astype(int)

### No error codes Table

In [14]:
noerror_table=reduced_transfers.drop(['intermediate_error_codes','request_completed_ack_codes','sender_error_code'],axis=1)
noerror_table=noerror_table.loc[~noerror_table['No error codes'].isna()]
noerror_table=noerror_table.rename({'No error codes':'Error Code'},axis=1)
noerror_table['Error Type']='No Error Code'
noerror_table=pd.pivot_table(noerror_table, index=['sending_supplier','requesting_supplier','Error Type','Error Code','status'],columns='month', aggfunc='count', values='conversation_id').fillna(0)
noerror_table['Total Volume']=noerror_table.sum(axis=1)
noerror_table=noerror_table.astype(int)

### Concatenate Tables

In [15]:
high_level_table=pd.concat([sender_table,intermediate_table,reqack_table,noerror_table])
high_level_table=high_level_table.reset_index()

high_level_table=high_level_table.reset_index().drop('index',axis=1)
high_level_table.index=high_level_table.index+1

high_level_table=high_level_table.merge(error_code_lookup_file,left_on='Error Code',right_on='ErrorCode',how='left')
high_level_table.loc[high_level_table['Error Code']=='No Error',['ErrorName','ResponseText']]='No Error'
high_level_table=high_level_table.drop(['ErrorCode','ErrorName'],axis=1)


# Add in Supplier Pathway %

In [16]:
full_high_level_table=high_level_table.copy()
total_transfers_supplier_pathway=transfers.groupby(['sending_supplier','requesting_supplier']).agg({'conversation_id':'count'}).rename({'conversation_id':'Total Supplier Pathway Transfers'},axis=1).reset_index()
full_high_level_table=full_high_level_table.merge(total_transfers_supplier_pathway,left_on=['sending_supplier','requesting_supplier'],right_on=['sending_supplier','requesting_supplier'])
full_high_level_table['% Supplier Pathway Transfers']=(100*full_high_level_table['Total Volume']/full_high_level_table['Total Supplier Pathway Transfers']).round(2)
full_high_level_table=full_high_level_table.drop('Total Supplier Pathway Transfers',axis=1)
high_level_table=full_high_level_table.copy()

In [17]:
monthly_transfers_supplier_pathway=pd.pivot_table(transfers,index=['sending_supplier','requesting_supplier'],columns='month',aggfunc='count', values='conversation_id').fillna(0).astype(int)
monthly_transfers_supplier_pathway
total_monthly_volumes_by_row=high_level_table[['sending_supplier','requesting_supplier']].merge(monthly_transfers_supplier_pathway, left_on=['sending_supplier','requesting_supplier'], right_index=True)
monthly_columns=monthly_transfers_supplier_pathway.columns

monthly_percentages=high_level_table[monthly_columns]/(total_monthly_volumes_by_row[monthly_columns])
monthly_percentages=(monthly_percentages.fillna(0)*100).round(2)
high_level_table[monthly_columns]=monthly_percentages

In [18]:
monthly_columns=[x for x in high_level_table.columns if type(x)!=str]
column_order=['sending_supplier','requesting_supplier','Error Type','ResponseText','status', 'Total Volume', '% Supplier Pathway Transfers','Error Code']+monthly_columns
high_level_table=high_level_table[column_order]

high_level_table=high_level_table.sort_values(by='Total Volume',ascending=False)

In [19]:
high_level_table
high_level_table.to_excel('top_level_problems_view.xlsx')