# Summary of transfer outcomes for practice M85092

**Context**

We would like to see a summary of transfer outcomes where the sending practice is M85092 for April data (if available), otherwise March data. 

NB: Upon finding there were only 8 relevant transfers, we also used March data (which contained 2600 transfers)

**Scope**

- Breakdown of transfers out, per month
    - Show outcome of each transfer
    - Show which practices they were sent to

In [9]:
import pandas as pd

In [10]:
# Import transfer files to extract whether message creator is sender or requester
transfer_file_location = "s3://prm-gp2gp-data-sandbox-dev/transfers-sample-5/"
transfer_files = [
    "2020-9-transfers.parquet",
    "2020-10-transfers.parquet",
    "2020-11-transfers.parquet",
    "2020-12-transfers.parquet",
    "2021-1-transfers.parquet",
    "2021-2-transfers.parquet",
    "2021-3-transfers.parquet",
    "2021-4-transfers.parquet",
]

#april_data="s3://prm-gp2gp-data-sandbox-dev/duplicate-fix-14-day-cut-off/2021/04/transfers.parquet"
transfer_input_files = [transfer_file_location + f for f in transfer_files] #+[april_data]
transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

# In the data from the PRMT-1742-duplicates-analysis branch, these columns have been added , but contain only empty values.
transfers_raw = transfers_raw.drop(["sending_supplier", "requesting_supplier"], axis=1)
transfers = transfers_raw.copy()

# Correctly interpret certain sender errors as failed.
# This is explained in PRMT-1974. Eventually this will be fixed upstream in the pipeline.
pending_sender_error_codes=[6,7,10,24,30,23,14,99]
transfers_with_pending_sender_code_bool=transfers['sender_error_code'].isin(pending_sender_error_codes)
transfers_with_pending_with_error_bool=transfers['status']=='PENDING_WITH_ERROR'
transfers_which_need_pending_to_failure_change_bool=transfers_with_pending_sender_code_bool & transfers_with_pending_with_error_bool
transfers.loc[transfers_which_need_pending_to_failure_change_bool,'status']='FAILED'

# Add integrated Late status
eight_days_in_seconds=8*24*60*60
transfers_after_sla_bool=transfers['sla_duration']>eight_days_in_seconds
transfers_with_integrated_bool=transfers['status']=='INTEGRATED'
transfers_integrated_late_bool=transfers_after_sla_bool & transfers_with_integrated_bool
transfers.loc[transfers_integrated_late_bool,'status']='INTEGRATED LATE'

# If the record integrated after 28 days, change the status back to pending.
# This is to handle each month consistently and to always reflect a transfers status 28 days after it was made.
# TBD how this is handled upstream in the pipeline
fourteen_days_in_seconds=14*24*60*60
transfers_after_month_bool=transfers['sla_duration']>fourteen_days_in_seconds
transfers_pending_at_month_bool=transfers_after_month_bool & transfers_integrated_late_bool
transfers.loc[transfers_pending_at_month_bool,'status']='PENDING'
transfers_with_early_error_bool=(~transfers.loc[:,'sender_error_code'].isna()) |(~transfers.loc[:,'intermediate_error_codes'].apply(len)>0)
transfers.loc[transfers_with_early_error_bool & transfers_pending_at_month_bool,'status']='PENDING_WITH_ERROR'

# Supplier name mapping
supplier_renaming = {
    "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)":"EMIS",
    "IN PRACTICE SYSTEMS LTD":"Vision",
    "MICROTEST LTD":"Microtest",
    "THE PHOENIX PARTNERSHIP":"TPP",
    None: "Unknown"
}

asid_lookup_file = "s3://prm-gp2gp-data-sandbox-dev/asid-lookup/asidLookup-Mar-2021.csv.gz"
asid_lookup = pd.read_csv(asid_lookup_file)
lookup = asid_lookup[["ASID", "MName", "NACS","OrgName"]]

transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'requesting_supplier', 'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code','OrgName':'requesting_practice_name'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'sending_supplier', 'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code','OrgName':'sending_practice_name'}, axis=1)

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

# Making the status to be more human readable here
transfers["status"] = transfers["status"].str.replace("_", " ").str.title()



In [11]:
transfers['Month of Transfer Request']=transfers['date_requested'].dt.year.astype(str) + '-' + transfers['date_requested'].dt.month.astype(str)

In [12]:
# Select the transfers where the sending practice is the practice of interest
practice_of_interest_bool = transfers["sending_ods_code"] == "M85092"
practice_transfers = transfers[practice_of_interest_bool]

In [13]:
# Create a table showing numbers of transfers to each practice and the status (at 14 days)
# Both the practice (rows) and status (columns) are ordered by most common first
ordered_requesting_practice_names=practice_transfers['requesting_practice_name'].value_counts().index
ordered_status=practice_transfers['status'].value_counts().index
ordered_dates=practice_transfers['Month of Transfer Request'].drop_duplicates().values
ordered_columns=[(date,status) for date in ordered_dates for status in ordered_status]


practice_transfers_count_table=practice_transfers.pivot_table(index='requesting_practice_name',columns=['Month of Transfer Request','status'],values='conversation_id',aggfunc='count')
ordered_columns=[column for column in ordered_columns if column in practice_transfers_count_table.columns]

practice_transfers_count_table=practice_transfers_count_table.loc[ordered_requesting_practice_names,ordered_columns].fillna(0).astype(int)
practice_transfers_count_table.to_csv( "s3://prm-gp2gp-data-sandbox-dev/notebook-outputs/38b-PRMT-2076-M85092-14-day-transfers-out.csv")

In [14]:
pd.DataFrame(practice_transfers['Month of Transfer Request'].value_counts()[ordered_dates].rename('Total Transfer Requests'))

Unnamed: 0,Total Transfer Requests
2020-9,31
2020-10,50
2020-11,35
2020-12,484
2021-1,1869
2021-2,1150
2021-3,2600
2021-4,8


In [27]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('PRMT-2076-M85092-outcomes.xlsx', engine='xlsxwriter')

# Write each dataframe to a different worksheet.
[practice_transfers_count_table[month].to_excel(writer, sheet_name=month) for month in ordered_dates]


# Close the Pandas Excel writer and output the Excel file.
writer.save()