# PRMT-2348 Analyse specific CCG data

We’ve done some initial analysis to understand if CCGs undergoing Lloyd George digitisation are generating more failures than the national average - (PRMT 2332). There are a couple of CCGs (shown below) that show they are above average for certain types of failures. This story is to look more in depth for these CCGs at their reasons for failures.

Create the error code combinations table (PRMT 2269) for the following CCGs, with a separate table for May, June, and July

- Fylde and Wyre
- Birmingham and Solihull

In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
import paths
from data.practice_metadata import read_asid_metadata
from data import gp2gp_response_codes

In [2]:
asid_lookup=read_asid_metadata("prm-gp2gp-ods-metadata-preprod", "v2/2021/7/organisationMetadata.json")

transfer_file_location = "s3://prm-gp2gp-transfer-data-preprod/v4/2021/"

transfer_files = [
    "5/transfers.parquet",
    "6/transfers.parquet",
    "7/transfers.parquet",
]
transfer_input_files = [transfer_file_location + f for f in transfer_files]

transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

transfers = transfers_raw\
    .join(asid_lookup.add_prefix("requesting_"), on="requesting_practice_asid", how="left")\
    .join(asid_lookup.add_prefix("sending_"), on="sending_practice_asid", how="left")\

transfers['month']=transfers['date_requested'].dt.to_period('M')
transfers["status"] = transfers["status"].str.replace("_", " ").str.title()

# Supplier name mapping
supplier_renaming = {
    "SystmOne":"TPP",
    None: "Unknown"
}

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

In [3]:
# Filter transfers 
ccgs_interested_in = ['NHS Fylde and Wyre CCG'.upper(), 'NHS Birmingham and Solihull CCG'.upper()]
sending_ccg_transfers = transfers[transfers['sending_ccg_name'].isin(ccgs_interested_in)].copy()
requesting_ccg_transfers = transfers[transfers['requesting_ccg_name'].isin(ccgs_interested_in)].copy()

In [4]:
error_code_lookup_file = pd.read_csv(gp2gp_response_codes.path)
error_code_lookup = error_code_lookup_file.set_index("ErrorCode")["ErrorName"]

In [5]:
def convert_error_list_to_tuple(error_code_list, error_code_type):
    return [(error_code_type, error_code, error_code_lookup[error_code]) for error_code in set(error_code_list) if not np.isnan(error_code)]
    
def combine_error_codes(row):
    sender_list = convert_error_list_to_tuple(row["sender_error_codes"], "Sender")
    intermediate_list = convert_error_list_to_tuple(row["intermediate_error_codes"], "COPC")
    final_list = convert_error_list_to_tuple(row["final_error_codes"], "Final")
    full_error_code_list = sender_list + intermediate_list + final_list
    if len(full_error_code_list) == 0:
        return tuple([("No Error Code", "No Error", "N/A")])
    else:
        return tuple(full_error_code_list)
    
sending_ccg_transfers["all_error_codes"] = sending_ccg_transfers.apply(combine_error_codes, axis=1)
requesting_ccg_transfers["all_error_codes"] = requesting_ccg_transfers.apply(combine_error_codes, axis=1)

In [6]:
def binarized_error_codes(table_sample):
# keeping this in case we want to consolidate error codes based on the int value rather then combined with error code type
    table_sample["error_code_list"]=table_sample["all_error_codes"].apply(lambda error_tuple_list: [error_tuple[1] for error_tuple in error_tuple_list if type(error_tuple[1])!=str])

    # split out error codes so we can use them to filter
    mlb = MultiLabelBinarizer()
    binarized = mlb.fit_transform(table_sample["error_code_list"])
    binarized_error_occurences = pd.DataFrame(data=binarized, columns=mlb.classes_, index=table_sample.index)

    return pd.concat([table_sample, binarized_error_occurences], axis=1).drop('error_code_list',axis=1)
    

In [7]:
def generate_high_level_table(transfers_sample, practice_type, ccg_name_field):
    # Create High level table
    high_level_table=(
        transfers_sample
                .fillna("N/A")
                .groupby([ccg_name_field,practice_type,"requesting_supplier","sending_supplier","status","failure_reason","all_error_codes"])
                .size()
                .to_frame("Number of Transfers").reset_index()
    )
    
    # Count % of transfers
    total_number_transfers = transfers_sample.shape[0]
    high_level_table["% of Transfers"]=(
        high_level_table["Number of Transfers"]/total_number_transfers
        ).multiply(100)

    # Count by supplier pathway
    supplier_pathway_counts = (
        transfers_sample
            .fillna("Unknown")
            .groupby(by=["sending_supplier", "requesting_supplier"])
            .size()
    )
    high_level_table["% Supplier Pathway Transfers"]=(
        high_level_table
            .apply(lambda row: row["Number of Transfers"]/supplier_pathway_counts.loc[(row["sending_supplier"],row["requesting_supplier"])],axis=1)
            .multiply(100)
    )
        
    # Add in Paper Fallback columns
    total_fallback = transfers_sample["failure_reason"].dropna().shape[0]
    fallback_bool=high_level_table["status"]!="Integrated On Time"
    high_level_table.loc[fallback_bool,"% Paper Fallback"]=(
            high_level_table["Number of Transfers"]/total_fallback
        ).multiply(100)
 
    
    # % of error codes column
    no_error_tuple = tuple([("No Error Code", "No Error", "N/A")])
    error_code_bool = transfers_sample["all_error_codes"]!=no_error_tuple
    total_number_of_error_code_combinations=error_code_bool.sum()
    table_error_code_bool = high_level_table["all_error_codes"]!=no_error_tuple
    high_level_table.loc[table_error_code_bool,"% of error codes"]=(
            high_level_table.loc[table_error_code_bool, "Number of Transfers"]/total_number_of_error_code_combinations
        ).multiply(100)
    
    # Select and re-order table
    grouping_columns_order=[ccg_name_field,practice_type,"requesting_supplier","sending_supplier","status","failure_reason", "all_error_codes"]
    counting_columns_order=["Number of Transfers","% of Transfers","% Supplier Pathway Transfers","% Paper Fallback","% of error codes"]
    high_level_table=(
        high_level_table[grouping_columns_order+counting_columns_order]
            .sort_values(by="Number of Transfers",ascending=False)
    )
    
    high_level_table=binarized_error_codes(high_level_table)
    return high_level_table

In [8]:
with pd.ExcelWriter("Error Code Combinations Tables CCG Requester View PRMT-2348.xlsx") as writer:
    generate_high_level_table(requesting_ccg_transfers.copy(), "requesting_practice_name", "requesting_ccg_name").to_excel(writer, sheet_name="All",index=False)
    [generate_high_level_table(requesting_ccg_transfers[(requesting_ccg_transfers['month']==month) & (requesting_ccg_transfers['requesting_ccg_name']==ccg)].copy(),"requesting_practice_name","requesting_ccg_name").to_excel(writer, sheet_name=f"{ccg[0:5]} {month}",index=False) for month in requesting_ccg_transfers['month'].unique() for ccg in ccgs_interested_in]


In [9]:
with pd.ExcelWriter("Error Code Combinations Tables CCG Sender View PRMT-2348.xlsx") as writer:
    generate_high_level_table(sending_ccg_transfers.copy(), "sending_practice_name", "sending_ccg_name").to_excel(writer, sheet_name="All",index=False)
    [generate_high_level_table(sending_ccg_transfers[(sending_ccg_transfers['month']==month) & (sending_ccg_transfers['sending_ccg_name']==ccg)].copy(),"sending_practice_name","sending_ccg_name").to_excel(writer, sheet_name=f"{ccg[0:5]} {month}",index=False) for month in sending_ccg_transfers['month'].unique() for ccg in ccgs_interested_in]
    