# PRMT-2173 Get 3 months of data for medical practice - Y05788

Check if we can see any transfers into this medical practice in the last three months, ODS Code is Y05788

In [1]:
import pandas as pd

In [2]:
# Import transfer files to extract whether message creator is sender or requester
transfer_file_location = "s3://prm-gp2gp-data-sandbox-dev/transfers-sample-5/"
transfer_files = [
    "2021-4-transfers.parquet"
]

transfer_input_files = [transfer_file_location + f for f in transfer_files]
transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

# In the data from the PRMT-1742-duplicates-analysis branch, these columns have been added , but contain only empty values.
transfers_raw = transfers_raw.drop(["sending_supplier", "requesting_supplier"], axis=1)
transfers = transfers_raw.copy()

# Correctly interpret certain sender errors as failed.
# This is explained in PRMT-1974. Eventually this will be fixed upstream in the pipeline.
# Step Two: reclassifying the relevant transfers with pending sender error codes to FAILED DUE TO SENDER ERROR CODE status for comparison
pending_sender_error_codes=[6,7,10,24,30,23,14,99]
transfers_with_pending_sender_code_bool=transfers['sender_error_code'].isin(pending_sender_error_codes)
transfers_with_pending_with_error_bool=transfers['status']=='PENDING_WITH_ERROR'
transfers_which_need_pending_to_failure_change_bool=transfers_with_pending_sender_code_bool & transfers_with_pending_with_error_bool
transfers.loc[transfers_which_need_pending_to_failure_change_bool,'status']='FAILED DUE TO SENDER ERROR CODE'

# Add integrated Late status
eight_days_in_seconds=8*24*60*60
transfers_after_sla_bool=transfers['sla_duration']>eight_days_in_seconds
transfers_with_integrated_bool=transfers['status']=='INTEGRATED'
transfers_integrated_late_bool=transfers_after_sla_bool & transfers_with_integrated_bool
transfers.loc[transfers_integrated_late_bool,'status']='INTEGRATED LATE'

# # If the record integrated after 28 days, change the status back to pending.
# # This is to handle each month consistently and to always reflect a transfers status 28 days after it was made.
# # TBD how this is handled upstream in the pipeline
# twenty_eight_days_in_seconds=28*24*60*60
# transfers_after_month_bool=transfers['sla_duration']>twenty_eight_days_in_seconds
# transfers_pending_at_month_bool=transfers_after_month_bool & transfers_integrated_late_bool
# transfers.loc[transfers_pending_at_month_bool,'status']='PENDING'
# transfers_with_early_error_bool=(~transfers.loc[:,'sender_error_code'].isna()) |(~transfers.loc[:,'intermediate_error_codes'].apply(len)>0)
# transfers.loc[transfers_with_early_error_bool & transfers_pending_at_month_bool,'status']='PENDING_WITH_ERROR'

# Supplier name mapping
supplier_renaming = {
    "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)":"EMIS",
    "IN PRACTICE SYSTEMS LTD":"Vision",
    "MICROTEST LTD":"Microtest",
    "THE PHOENIX PARTNERSHIP":"TPP",
    None: "Unknown"
}

# Generate ASID lookup that contains all the most recent entry for all ASIDs encountered
asid_file_location = "s3://prm-gp2gp-data-sandbox-dev/asid-lookup/"
asid_files = [
    "asidLookup-Nov-2020.csv.gz",
    "asidLookup-Dec-2020.csv.gz",
    "asidLookup-Jan-2021.csv.gz",
    "asidLookup-Feb-2021.csv.gz",
    "asidLookup-Mar-2021.csv.gz",
    "asidLookup-Apr-2021.csv.gz",
    "asidLookup-May-2021.csv.gz",
]
asid_lookup_files = [asid_file_location + f for f in asid_files]
asid_lookup = pd.concat((
    pd.read_csv(f)
    for f in asid_lookup_files
))
asid_lookup = asid_lookup.drop_duplicates().groupby("ASID").last().reset_index()
lookup = asid_lookup[["ASID", "MName", "NACS","OrgName"]]

transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'requesting_supplier', 'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code','OrgName':'requesting_practice_name'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'sending_supplier', 'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code','OrgName':'sending_practice_name'}, axis=1)

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

# Making the status to be more human readable here
transfers["status"] = transfers["status"].str.replace("_", " ").str.title()

In [3]:
transfers = transfers.set_index("conversation_id")

In [4]:
asid_lookup.set_index("NACS").loc["Y05788"].pivot_table(index="ASID", columns="PName", aggfunc="count", values="OrgName").fillna(0).astype(int)

PName,EMIS,EMIS Web
ASID,Unnamed: 1_level_1,Unnamed: 2_level_1
200000008928,0,1
200000019320,1,0


In [5]:
transfers[transfers['requesting_ods_code'] == "Y05788"]

Unnamed: 0_level_0,sla_duration,requesting_practice_asid,sending_practice_asid,sender_error_code,final_error_codes,intermediate_error_codes,status,date_requested,date_completed,requesting_supplier_asid,requesting_supplier,requesting_ods_code,requesting_practice_name,sending_supplier_asid,sending_supplier,sending_ods_code,sending_practice_name
conversation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
F901A9F8-B512-4898-9A0E-2D481979D1FB,2500201.0,200000008928,532002786012,,[15.0],[],Integrated Late,2021-04-12 10:25:15.347,2021-05-11 08:56:10.133,200000008928,EMIS,Y05788,COMPASS MEDICAL PRACTICE,532002786012,EMIS,M86037,QUINTON PARK MEDICAL CENTRE
B473D33A-77F9-42D3-86EC-5ACC05F3629A,2234784.0,200000008928,947085239046,,[17.0],[],Failed,2021-04-15 12:09:54.670,2021-05-11 08:57:43.265,200000008928,EMIS,Y05788,COMPASS MEDICAL PRACTICE,947085239046,EMIS,P81149,LOCKWOOD GP SURGERY


## Other: Splunk Investigation

Using both ASID codes

```
index="spine2vfmmonitor" service="gp2gp" logReference="MPS0053d" interactionID="urn:nhs:names:services:gp2gp/RCMR_IN010000UK05" messageSender IN (200000008928, 200000019320)
```

For the three months: 16/03/2021 11:00:00.000 to 16/06/2021 11:00:00.000

We identified 2 conversations that took place during this time:
- B473D33A-77F9-42D3-86EC-5ACC05F3629A
- F901A9F8-B512-4898-9A0E-2D481979D1FB

Confirmed with second Splunk query
```
index="spine2vfmmonitor" service="gp2gp" logReference="MPS0053d" interactionID="urn:nhs:names:services:gp2gp/RCMR_IN010000UK05" fromGP="COMPASS MEDICAL PRACTICE"
```

Taking a closer look using:
```
index="spine2vfmmonitor" service="gp2gp" logReference="MPS0053d" conversationID IN (B473D33A-77F9-42D3-86EC-5ACC05F3629A,F901A9F8-B512-4898-9A0E-2D481979D1FB) 
| eval interactionName=case(
  interactionID=="urn:nhs:names:services:gp2gp/RCMR_IN010000UK05", "request started", 
  interactionID=="urn:nhs:names:services:gp2gp/RCMR_IN030000UK06", "request completed",
  interactionID=="urn:nhs:names:services:gp2gp/COPC_IN000001UK01", "common point to point",
  interactionID=="urn:nhs:names:services:gp2gp/MCCI_IN010000UK13", "application acknowledgement")
| table _time, conversationID, GUID, interactionID, messageSender, messageRecipient, messageRef, jdiEvent, toSystem, fromSystem, interactionName
```