# PRMT-2512 - Transfer and Error Code combination table for 17th Jan

## Context
Run error combo table and transfer level failure report for 17th Jan with no cut off

In [1]:
import pandas as pd 
import numpy as np
import paths, data
from sklearn.preprocessing import MultiLabelBinarizer
from datetime import datetime
from data.practice_metadata import read_asid_metadata

## Check spine messages

Using Splunk web: 331988 events on 17th Jan

In [2]:
spine_messages_17_jan = pd.read_csv("s3://prm-gp2gp-raw-spine-data-dev/v3/2022/01/17/2022-01-17_spine_messages.csv.gz")

In [3]:
spine_messages_17_jan.shape[0]

331988

## Import transfer dataset

In [4]:
error_code_lookup_file = pd.read_csv(data.gp2gp_response_codes.path)
error_code_lookup = error_code_lookup_file.set_index("ErrorCode")["ErrorName"]

In [5]:
asid_lookup = read_asid_metadata("prm-gp2gp-ods-metadata-dev", "v2/2022/1/organisationMetadata.json")
transfer_file_location = "s3://prm-gp2gp-transfer-data-dev/v7/cutoff-0/2022/01/17/2022-01-17-transfers.parquet"

transfers_raw = pd.read_parquet(transfer_file_location)
transfers = transfers_raw\
    .join(asid_lookup.add_prefix("requesting_"), on="requesting_practice_asid", how="left")\
    .join(asid_lookup.add_prefix("sending_"), on="sending_practice_asid", how="left")\

transfers["status"] = transfers["status"].str.replace("_", " ").str.capitalize()

len(transfers)

10497

In [6]:
# Check that transfers are all on 17th Jan
first_half_date_filter_bool_start = transfers["date_requested"] >= datetime(2022, 1, 17)
first_half_date_filter_bool_end = transfers["date_requested"] < datetime(2022, 1, 18)
transfers_within_date_range = transfers[first_half_date_filter_bool_start & first_half_date_filter_bool_end]
len(transfers_within_date_range)

10497

## Generating the transfer level report

In [7]:
# Look for transfers that have status Technical Failure or Unclassified Failure
failed_transfers_bool = (transfers_within_date_range["status"] == "Technical failure")| (transfers_within_date_range["status"] == "Unclassified failure")
failed_transfers = transfers_within_date_range[failed_transfers_bool]
len(failed_transfers)

267

In [8]:
# Remove Nan and duplicate occurrences in error codes, and sort them
def filter_error_codes(error_codes_column):
    return error_codes_column.apply(lambda error_codes: [error_code for error_code in sorted(set(error_codes)) if not np.isnan(error_code)])

# This is important otherwise notebook will complain about mutating original list/dataset in function calls below
failed_transfers = failed_transfers.copy()

failed_transfers["unique_sender_error_codes"] = filter_error_codes(failed_transfers["sender_error_codes"])
failed_transfers["unique_final_error_codes"] = filter_error_codes(failed_transfers["final_error_codes"])
failed_transfers["unique_intermediate_error_codes"] = filter_error_codes(failed_transfers["intermediate_error_codes"])
failed_transfers.head(1)

Unnamed: 0,conversation_id,sla_duration,requesting_practice_asid,sending_practice_asid,requesting_supplier,sending_supplier,sender_error_codes,final_error_codes,intermediate_error_codes,status,...,requesting_practice_name,requesting_ccg_ods_code,requesting_ccg_name,sending_practice_ods_code,sending_practice_name,sending_ccg_ods_code,sending_ccg_name,unique_sender_error_codes,unique_final_error_codes,unique_intermediate_error_codes
34,7984FC23-1BFC-4652-AC08-D50ADC5D693F,52.0,31590848011,789841028044,EMIS,EMIS,[nan],[99.0],[],Technical failure,...,AMBLESIDE HEALTH CENTRE,01K,NHS MORECAMBE BAY CCG,P88020,CHEADLE MEDICAL PRACTICE,01W,NHS STOCKPORT CCG,[],[99.0],[]


In [9]:
error_code_lookup_file = pd.read_csv(data.gp2gp_response_codes.path)
error_code_lookup = error_code_lookup_file.set_index("ErrorCode")["ErrorName"]

In [10]:
# Add descriptions from lookup to error code columns
def convert_error_list_to_formatted_string_list_with_error_lookup(error_code_list):
    return ", ".join([str(error_code.astype(int))+" - "+str(error_code_lookup[error_code]) for error_code in error_code_list])

failed_transfers["unique_sender_error_codes_with_description"] = failed_transfers["unique_sender_error_codes"].apply(convert_error_list_to_formatted_string_list_with_error_lookup)
failed_transfers["unique_final_error_codes_with_description"] = failed_transfers["unique_final_error_codes"].apply(convert_error_list_to_formatted_string_list_with_error_lookup)
failed_transfers["unique_intermediate_error_codes_with_description"] = failed_transfers["unique_intermediate_error_codes"].apply(convert_error_list_to_formatted_string_list_with_error_lookup)

In [11]:
# Prune columns we want to keep for the report
columns_to_keep = [
    'sending_practice_ods_code',
    'sending_supplier',
    'requesting_practice_ods_code',    
    'requesting_supplier',
    'conversation_id', 
    'date_requested', 
    'status',
    'failure_reason', 
    'unique_sender_error_codes_with_description', 
    'unique_final_error_codes_with_description',
    'unique_intermediate_error_codes_with_description']

failed_transfers_with_filtered_and_renamed_columns = failed_transfers[columns_to_keep]

failed_transfers_with_filtered_and_renamed_columns=failed_transfers_with_filtered_and_renamed_columns.rename({
    "sending_practice_ods_code":"Sending Practice ODS",
    "sending_supplier":"Sending Supplier",
    "requesting_practice_ods_code":"Requesting Practice ODS",
    "requesting_supplier":"Requesting Supplier",
    "conversation_id":"Conversation ID",
    "date_requested":"Date Requested",
    "status":"Status",
    "failure_reason":"Failure Reason",
    "unique_sender_error_codes_with_description":"Unique Sender Errors",
    "unique_final_error_codes_with_description":"Unique Final Errors",
    "unique_intermediate_error_codes_with_description":"Unique Intermediate Errors"
    },axis=1)

failed_transfers_with_filtered_and_renamed_columns.head(2)

Unnamed: 0,Sending Practice ODS,Sending Supplier,Requesting Practice ODS,Requesting Supplier,Conversation ID,Date Requested,Status,Failure Reason,Unique Sender Errors,Unique Final Errors,Unique Intermediate Errors
34,P88020,EMIS,A82005,EMIS,7984FC23-1BFC-4652-AC08-D50ADC5D693F,2022-01-17 15:56:47.366,Technical failure,Final error,,99 - Unexpected,
122,P81002,EMIS,Y03035,EMIS,6A2ECBE9-8F82-450F-A5C1-4D732F504599,2022-01-17 13:46:32.076,Unclassified failure,Ambiguous COPC messages,,12 - Duplicate EHR,


## Unique error codes count

In [12]:
data = {
    'unique_sender_error_codes_count': failed_transfers.explode("unique_sender_error_codes").value_counts("unique_sender_error_codes"),
    'unique_final_error_codes_count': failed_transfers.explode("unique_final_error_codes").value_counts("unique_final_error_codes"),
    'unique_intermediate_error_codes_count': failed_transfers.explode("unique_intermediate_error_codes").value_counts("unique_intermediate_error_codes")
    }
df = pd.DataFrame(data=data)

print("17 Jan after fix [total transfers:"+str(len(transfers_within_date_range))+", total failed transfers (technical + unclassified): "+str(len(failed_transfers))+"]")
df.sort_values(by=["unique_sender_error_codes_count", "unique_final_error_codes_count", "unique_intermediate_error_codes_count"], ascending=False)

17 Jan after fix [total transfers:10497, total failed transfers (technical + unclassified): 267]


Unnamed: 0,unique_sender_error_codes_count,unique_final_error_codes_count,unique_intermediate_error_codes_count
30.0,92.0,7.0,
20.0,31.0,,
10.0,27.0,,
6.0,6.0,,
12.0,,22.0,
99.0,,13.0,
11.0,,6.0,
31.0,,6.0,
17.0,,3.0,
21.0,,1.0,


In [13]:
df.sum(axis=0)

unique_sender_error_codes_count          156.0
unique_final_error_codes_count            58.0
unique_intermediate_error_codes_count      9.0
dtype: float64

## Generating High level table

In [14]:
# Format the errors to be readable strings
transfers_within_date_range["unique_sender_error_codes"] = filter_error_codes(transfers_within_date_range["sender_error_codes"])
transfers_within_date_range["unique_final_error_codes"] = filter_error_codes(transfers_within_date_range["final_error_codes"])
transfers_within_date_range["unique_intermediate_error_codes"] = filter_error_codes(transfers_within_date_range["intermediate_error_codes"])

transfers_within_date_range["unique_sender_error_codes_with_description"] = transfers_within_date_range["unique_sender_error_codes"].apply(convert_error_list_to_formatted_string_list_with_error_lookup)
transfers_within_date_range["unique_final_error_codes_with_description"] = transfers_within_date_range["unique_final_error_codes"].apply(convert_error_list_to_formatted_string_list_with_error_lookup)
transfers_within_date_range["unique_intermediate_error_codes_with_description"] = transfers_within_date_range["unique_intermediate_error_codes"].apply(convert_error_list_to_formatted_string_list_with_error_lookup)
transfers_within_date_range.head(1)

Unnamed: 0,conversation_id,sla_duration,requesting_practice_asid,sending_practice_asid,requesting_supplier,sending_supplier,sender_error_codes,final_error_codes,intermediate_error_codes,status,...,sending_practice_ods_code,sending_practice_name,sending_ccg_ods_code,sending_ccg_name,unique_sender_error_codes,unique_final_error_codes,unique_intermediate_error_codes,unique_sender_error_codes_with_description,unique_final_error_codes_with_description,unique_intermediate_error_codes_with_description
0,1F4CE0D9-49B5-41EA-8B50-A13FF8F7533A,6509.0,343076154040,946977962046,EMIS,EMIS,[nan],[15.0],[],Integrated on time,...,G82015,PENCESTER SURGERY,91Q,NHS KENT AND MEDWAY CCG,[],[15.0],[],,15 - ABA suppressed,


In [15]:
def generate_high_level_table(transfers_sample):
    # Create High level table
    high_level_table=transfers_sample.fillna("N/A").groupby(["requesting_supplier","sending_supplier","status","failure_reason", "unique_final_error_codes_with_description", "unique_sender_error_codes_with_description", "unique_intermediate_error_codes_with_description"]).agg({"conversation_id":"count"})
    high_level_table=high_level_table.rename({"conversation_id":"number of transfers"},axis=1).reset_index()

    # Count % of transfers
    total_number_transfers = transfers_sample.shape[0]
    high_level_table["% of transfers"]=(high_level_table["number of transfers"]/total_number_transfers).multiply(100)
    
    # Count % of technical failures
    technical_failed_transfers_bool = transfers_sample["status"] == "Technical failure"
    technical_failed_transfers = transfers_sample[technical_failed_transfers_bool]
    total_number_technical_failed_transfers = technical_failed_transfers.shape[0]
    technical_failed_transfers_table_bool = high_level_table["status"] == "Technical failure"
    high_level_table.loc[technical_failed_transfers_table_bool, "% of technical failures"]=(high_level_table.loc[technical_failed_transfers_table_bool, "number of transfers"]/total_number_technical_failed_transfers).multiply(100)
    
    # Count by supplier pathway
    supplier_pathway_counts = transfers_sample.fillna("Unknown").groupby(by=["sending_supplier", "requesting_supplier"]).agg({"conversation_id": "count"})["conversation_id"]
    high_level_table["% of supplier pathway"]=high_level_table.apply(lambda row: row["number of transfers"]/supplier_pathway_counts.loc[(row["sending_supplier"],row["requesting_supplier"])],axis=1).multiply(100)

    # Select and re-order table
    grouping_columns_order=["requesting_supplier","sending_supplier","status","failure_reason", "unique_final_error_codes_with_description", "unique_sender_error_codes_with_description", "unique_intermediate_error_codes_with_description"]
    counting_columns_order=["number of transfers","% of transfers","% of technical failures","% of supplier pathway"]
    high_level_table=high_level_table[grouping_columns_order+counting_columns_order].sort_values(by="number of transfers",ascending=False)
    
    # Rename the columns
    high_level_table=high_level_table.rename({
        "sending_supplier":"sending supplier",
        "requesting_supplier":"requesting supplier",
        "status":"status",
        "failure_reason":"failure reason",
        "unique_final_error_codes_with_description":"unique final errors",
        "unique_sender_error_codes_with_description":"unique sender errors",
        "unique_intermediate_error_codes_with_description":"unique intermediate errors"
        },axis=1)
    
    # Replace all N/A with empty strings primarily for error columns
    high_level_table=high_level_table.replace("N/A", "")
    
    return high_level_table

In [16]:
with pd.ExcelWriter("PRMT-2512-Error-code-combination-and-transfer-level-table-17-jan.xlsx") as writer:
     generate_high_level_table(transfers_within_date_range).to_excel(writer, sheet_name="Error Code Combination Table",index=False)
     failed_transfers_with_filtered_and_renamed_columns.to_excel(writer, sheet_name="Transfer level failures",index=False)