# PRMT-2348 Analyse specific CCG data

We’ve done some initial analysis to understand if CCGs undergoing Lloyd George digitisation are generating more failures than the national average - (PRMT 2332). There are a couple of CCGs (shown below) that show they are above average for certain types of failures. This story is to look more in depth for these CCGs at their reasons for failures.

Create the error code combinations table (PRMT 2269) for the following CCGs, with a separate table for May, June, and July

- Fylde and Wyre
- Birmingham and Solihull

In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

In [2]:
transfer_file_location = "s3://prm-gp2gp-transfer-data-preprod/v4/2021/"
transfer_files = [
    "5/transfers.parquet",
    "6/transfers.parquet",
    "7/transfers.parquet",
]
transfer_input_files = [transfer_file_location + f for f in transfer_files]
transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

transfers = transfers_raw.copy()

In [3]:
# Supplier name mapping
supplier_renaming = {
    "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)":"EMIS",
    "IN PRACTICE SYSTEMS LTD":"Vision",
    "MICROTEST LTD":"Microtest",
    "THE PHOENIX PARTNERSHIP":"TPP",
    "SystmOne": "TPP",
    None: "Unknown"
}

# Generate ASID lookup that contains all the most recent entry for all ASIDs encountered
asid_lookup_file_location = "s3://prm-gp2gp-asid-lookup-preprod/"
asid_lookup_files = [
    "2021/7/asidLookup.csv.gz",
    "2021/8/asidLookup.csv.gz",
    "2021/9/asidLookup.csv.gz"    
]
asid_lookup_input_files = [asid_lookup_file_location + f for f in asid_lookup_files]
asid_lookup = pd.concat((
    pd.read_csv(f)
    for f in asid_lookup_input_files
)).drop_duplicates().groupby("ASID").last().reset_index()
lookup = asid_lookup[["ASID", "NACS","OrgName"]]

transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code','OrgName':'requesting_practice_name'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code','OrgName':'sending_practice_name'}, axis=1)

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

# Making the status to be more human readable here
transfers["status"] = transfers["status"].str.replace("_", " ").str.title()

In [4]:
# CCG practices copied from practiceMetrics.json - July 2021
ods_codes_to_investigate=dict()
ods_codes_to_investigate['Fylde and Wyre']=["P81037", "P81129", "P81742", "P81086", "P81157", "P81668", "P81006", "P81059", "P81077", "P81128", "P81149", "P81150", "P81031", "P81191", "P81089", "P81737", "P81087", "P81079", "P81133"]
ods_codes_to_investigate['Birmingham and Solihull']=["M85736", "M85139", "M85123", "M85149", "Y01068", "M85158", "M89030", "M89013", "M85026", "M85749", "M85779", "M85118", "M89016", "M85766", "M85117", "M85025", "M85159", "M85732", "M85781", "M89008", "M85041", "M85671", "M85113", "M85701", "M88006", "M85642", "M85679", "M85013", "M85048", "M85711", "M85086", "M85029", "M85087", "M85792", "M85680", "M85699", "M89021", "M85097", "M89012", "M89010", "M85717", "M85716", "M85686", "M85065", "M85706", "M85136", "M85128", "M85079", "M85693", "M85116", "M85154", "M85107", "M85051", "M81062", "M85694", "M85170", "M89003", "M89024", "M85670", "M89027", "M85006", "M85735", "Y00159", "Y03597", "M89608", "M85058", "M85043", "M85713", "M85016", "M89019", "Y02567", "M85076", "Y02893", "M85143", "M85088", "M85142", "M85014", "M85037", "M89002", "M85600", "M85155", "M85027", "M85730", "M85028", "M89009", "M85179", "M85063", "M85023", "M85746", "M85733", "M85021", "M85722", "M85759", "M89015", "M85077", "Y02794", "M85078", "M85034", "M85739", "M89001", "M85803", "M85624", "Y02571", "M91642", "M85070", "M89007", "M85172", "M85156", "M85171", "M85110", "M85141", "M85055", "M85042", "M85001", "M85062", "M88020", "M85108", "M89017", "M85756", "M85774", "M85030", "M89005", "M85783", "M85046", "M85115", "M85177", "M85794", "M85174", "M89026", "M85081", "M85031", "M85175", "Y02620", "Y05826", "M85146", "M85024", "M85033", "M85060", "M85005", "M85008", "M85770", "M85053", "M85011", "M85084", "M85669", "M85167", "M85782", "M85753", "M85105", "M85074", "M85066", "M85153", "M85056", "M85134", "M85007", "M85035", "M85047", "M85071", "M85061", "M85094", "M85018"]

In [5]:
def filter_by_ods_code(ods_code_field, transfers_sample):
    transfers_sample['CCG']=None
    for investigation_group in ods_codes_to_investigate.keys():
        practices_of_interest = ods_codes_to_investigate[investigation_group]
        is_of_interest_bool = transfers_sample[ods_code_field].apply(lambda ods_code: ods_code in practices_of_interest)
        transfers_sample.loc[is_of_interest_bool,'CCG']=investigation_group

        ccg_practice_bool = pd.notna(transfers_sample["CCG"])
        transfers_of_interest = transfers_sample[ccg_practice_bool].copy()
        return transfers_of_interest

ccgs_sender_view = filter_by_ods_code("sending_ods_code", transfers.copy())
ccgs_requester_view = filter_by_ods_code("requesting_ods_code", transfers.copy())

In [6]:
import paths, data
error_code_lookup_file = pd.read_csv(data.gp2gp_response_codes.path)
error_code_lookup = error_code_lookup_file.set_index("ErrorCode")["ErrorName"]

In [7]:
def convert_error_list_to_tuple(error_code_list, error_code_type):
    return [(error_code_type, error_code, error_code_lookup[error_code]) for error_code in set(error_code_list) if not np.isnan(error_code)]
    
def combine_error_codes(row):
    sender_list = convert_error_list_to_tuple(row["sender_error_codes"], "Sender")
    intermediate_list = convert_error_list_to_tuple(row["intermediate_error_codes"], "COPC")
    final_list = convert_error_list_to_tuple(row["final_error_codes"], "Final")
    full_error_code_list = sender_list + intermediate_list + final_list
    if len(full_error_code_list) == 0:
        return tuple([("No Error Code", "No Error", "N/A")])
    else:
        return tuple(full_error_code_list)
    
ccgs_sender_view["all_error_codes"] = ccgs_sender_view.apply(combine_error_codes, axis=1)
ccgs_requester_view["all_error_codes"] = ccgs_requester_view.apply(combine_error_codes, axis=1)

In [8]:
def binarized_error_codes(table_sample):
# keeping this in case we want to consolidate error codes based ont he int value rather then combined with error code type
    table_sample["error_code_list"]=table_sample["all_error_codes"].apply(lambda error_tuple_list: [error_tuple[1] for error_tuple in error_tuple_list if type(error_tuple[1])!=str])

    # split out error codes so we can use them to filter
    mlb = MultiLabelBinarizer()
    binarized = mlb.fit_transform(table_sample["error_code_list"])
    binarized_error_occurences = pd.DataFrame(data=binarized, columns=mlb.classes_, index=table_sample.index)

    return pd.concat([table_sample, binarized_error_occurences], axis=1).drop('error_code_list',axis=1)
    

In [9]:
def generate_high_level_table(transfers_sample, practice_type):

    # Create High level table
    high_level_table=transfers_sample.fillna("N/A").groupby(["CCG",practice_type,"requesting_supplier","sending_supplier","status","failure_reason","all_error_codes"]).agg({"conversation_id":"count"})
    high_level_table=high_level_table.rename({"conversation_id":"Number of Transfers"},axis=1).reset_index()
    

    # Count % of transfers
    total_number_transfers = transfers_sample.shape[0]
    high_level_table["% of Transfers"]=(high_level_table["Number of Transfers"]/total_number_transfers).multiply(100)

    # Count by supplier pathway
    supplier_pathway_counts = transfers_sample.fillna("Unknown").groupby(by=["sending_supplier", "requesting_supplier"]).agg({"conversation_id": "count"})["conversation_id"]
    high_level_table["% Supplier Pathway Transfers"]=high_level_table.apply(lambda row: row["Number of Transfers"]/supplier_pathway_counts.loc[(row["sending_supplier"],row["requesting_supplier"])],axis=1).multiply(100)

    # Add in Paper Fallback columns
    total_fallback = transfers_sample["failure_reason"].dropna().shape[0]
    fallback_bool=high_level_table["status"]!="Integrated On Time"
    high_level_table.loc[fallback_bool,"% Paper Fallback"]=(high_level_table["Number of Transfers"]/total_fallback).multiply(100)
 
    
    # % of error codes column
    no_error_tuple = tuple([("No Error Code", "No Error", "N/A")])
    error_code_bool = transfers_sample["all_error_codes"]!=no_error_tuple
    total_number_of_error_code_combinations=error_code_bool.sum()
    table_error_code_bool = high_level_table["all_error_codes"]!=no_error_tuple
    high_level_table.loc[table_error_code_bool,"% of error codes"]=(high_level_table.loc[table_error_code_bool, "Number of Transfers"]/total_number_of_error_code_combinations).multiply(100)
    
    # Select and re-order table
    grouping_columns_order=["CCG",practice_type,"requesting_supplier","sending_supplier","status","failure_reason", "all_error_codes"]
    counting_columns_order=["Number of Transfers","% of Transfers","% Supplier Pathway Transfers","% Paper Fallback","% of error codes"]
    high_level_table=high_level_table[grouping_columns_order+counting_columns_order].sort_values(by="Number of Transfers",ascending=False)
    
    high_level_table=binarized_error_codes(high_level_table)
    return high_level_table

In [18]:
ccgs_sender_view['month']=ccgs_sender_view['date_requested'].dt.to_period('M')
ccgs_requester_view['month']=ccgs_requester_view['date_requested'].dt.to_period('M')

In [19]:
with pd.ExcelWriter("Error Code Combinations Tables CCG Sender View PRMT-2348.xlsx") as writer:
    generate_high_level_table(ccgs_sender_view.copy(), "sending_practice_name").to_excel(writer, sheet_name="All",index=False)
    [generate_high_level_table(ccgs_sender_view[ccgs_sender_view['month']==month].copy(),"sending_practice_name").to_excel(writer, sheet_name=str(month),index=False) for month in ccgs_sender_view['month'].unique()]

In [20]:
with pd.ExcelWriter("Error Code Combinations Tables CCG Requester View PRMT-2348.xlsx") as writer:
    generate_high_level_table(ccgs_requester_view.copy(), "requesting_practice_name").to_excel(writer, sheet_name="All",index=False)
    [generate_high_level_table(ccgs_requester_view[ccgs_requester_view['month']==month].copy(),"requesting_practice_name").to_excel(writer, sheet_name=str(month),index=False) for month in ccgs_requester_view['month'].unique()]