Hypothesis
We believe that for two Vision practices, pending Vision transfers look different to EMIS and TPP pending transfers
We will know this to be true when we can see different patterns in the data for each supplier in terms of the number of messages per conversation 

Scope
Look at the following practice ASID codes

896286726030

244934959036

Compare the makeup of messages per conversation across pending transfers for each supplier and identify if there’s any patterns 

Generate a sample of 10 conversation IDs per practice, for pending transfers that are Vision 2 Vision

Show the makeup of number of messages per conversation ID

Acceptance Criteria
We have a list of 20 conversation IDs for vision to vision pending transfers for the two practices stated, and we know how many messages there are for each of these conversations

We have a confluence page that shows any patterns that either prove or disprove the hypothesis

In [17]:
import pandas as pd
import numpy as np
# Using data generated from branch PRMT-1742-duplicates-analysis.
# This is needed to correctly handle duplicates.
# Once the upstream pipeline has a fix for duplicate EHRs, then we can go back to using the main output.
transfer_file_location = "s3://prm-gp2gp-data-sandbox-dev/transfers-duplicates-hypothesis/"
transfer_files = [
    "9-2020-transfers.parquet",
    "10-2020-transfers.parquet",
    "11-2020-transfers.parquet",
    "12-2020-transfers.parquet",
    "1-2021-transfers.parquet",
    "2-2021-transfers.parquet"
]

transfer_input_files = [transfer_file_location + f for f in transfer_files]
transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

# In the data from the PRMT-1742-duplicates-analysis branch, these columns have been added , but contain only empty values.
transfers_raw = transfers_raw.drop(["sending_supplier", "requesting_supplier"], axis=1)


# Given the findings in PRMT-1742 - many duplicate EHR errors are misclassified, the below reclassifies the relevant data

has_at_least_one_successful_integration_code = lambda errors: any((np.isnan(e) or e==15 for e in errors))
successful_transfers_bool = transfers_raw['request_completed_ack_codes'].apply(has_at_least_one_successful_integration_code)
transfers = transfers_raw.copy()
transfers.loc[successful_transfers_bool, "status"] = "INTEGRATED"

# Correctly interpret certail sender errors as failed.
# This is explained in PRMT-1974. Eventaully this will be fixed upstream in the pipeline. 
pending_sender_error_codes=[6,7,10,24,30,23,14,99]
transfers_with_pending_sender_code_bool=transfers['sender_error_code'].isin(pending_sender_error_codes)
transfers_with_pending_with_error_bool=transfers['status']=='PENDING_WITH_ERROR'
transfers_which_need_pending_to_failure_change_bool=transfers_with_pending_sender_code_bool & transfers_with_pending_with_error_bool
transfers.loc[transfers_which_need_pending_to_failure_change_bool,'status']='FAILED'

# Add integrated Late status
eight_days_in_seconds=8*24*60*60
transfers_after_sla_bool=transfers['sla_duration']>eight_days_in_seconds
transfers_with_integrated_bool=transfers['status']=='INTEGRATED'
transfers_integrated_late_bool=transfers_after_sla_bool & transfers_with_integrated_bool
transfers.loc[transfers_integrated_late_bool,'status']='INTEGRATED LATE'

# If the record integrated after 28 days, change the status back to pending.
# This is to handle each month consistentently and to always reflect a transfers status 28 days after it was made.
# TBD how this is handled upstream in the pipeline
twenty_eight_days_in_seconds=28*24*60*60
transfers_after_month_bool=transfers['sla_duration']>twenty_eight_days_in_seconds
transfers_pending_at_month_bool=transfers_after_month_bool & transfers_integrated_late_bool
transfers.loc[transfers_pending_at_month_bool,'status']='PENDING'
transfers_with_early_error_bool=(~transfers.loc[:,'sender_error_code'].isna()) |(~transfers.loc[:,'intermediate_error_codes'].apply(len)>0)
transfers.loc[transfers_with_early_error_bool & transfers_pending_at_month_bool,'status']='PENDING_WITH_ERROR'

# Supplier name mapping
supplier_renaming = {
    "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)":"EMIS",
    "IN PRACTICE SYSTEMS LTD":"Vision",
    "MICROTEST LTD":"Microtest",
    "THE PHOENIX PARTNERSHIP":"TPP",
    None: "Unknown"
}

asid_lookup_file = "s3://prm-gp2gp-data-sandbox-dev/asid-lookup/asidLookup-Mar-2021.csv.gz"
asid_lookup = pd.read_csv(asid_lookup_file)
lookup = asid_lookup[["ASID", "MName", "NACS","OrgName"]]

transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'requesting_supplier', 'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code','OrgName':'requesting_practice_name'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'MName': 'sending_supplier', 'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code','OrgName':'sending_practice_name'}, axis=1)

transfers["sending_supplier"] = transfers["sending_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())
transfers["requesting_supplier"] = transfers["requesting_supplier"].replace(supplier_renaming.keys(), supplier_renaming.values())

In [26]:
practice_asids=['896286726030' ,'244934959036']
relevant_practice_bool=transfers['requesting_practice_asid'].isin(practice_asids)
from_vision_bool=transfers['sending_supplier']=='Vision'
pending_status_bool=transfers['status']=='PENDING'
relevant_data_bool=(relevant_practice_bool)&(from_vision_bool)&(pending_status_bool)
relevant_transfers=transfers.loc[relevant_data_bool]

In [32]:
practice_1_data=relevant_transfers.loc[relevant_transfers['requesting_practice_asid']==practice_asids[0]]
practice_1_data.sample(n=10)['conversation_id'].values

array(['610DEEDD-AB9A-420B-A3C9-24D3C5282858',
       '8C68C440-1993-4E6F-A27B-7079FFB2DD5D',
       'EA15568A-5802-4FE0-BA78-DFFFE31D8343',
       'D9802ED9-28BA-45AD-BAA3-099E8F1DF832',
       'AD4DD41D-CFA6-4019-BBF5-FC49F745F8B3',
       '5C0BDBEE-C659-4E2D-957C-257B258D806E',
       '3EF1056B-BEF5-483B-B366-867A26AC8C61',
       'CA653500-6C89-46FB-A6E9-F5D3E24FED39',
       '44A7F555-E964-45CD-9820-45079CE38A51',
       '36D11428-665F-41EA-A688-11C417544BE9'], dtype=object)

In [33]:
practice_2_data=relevant_transfers.loc[relevant_transfers['requesting_practice_asid']==practice_asids[1]]
practice_2_data.sample(n=10)['conversation_id'].values

array(['0CFB58BA-2EF7-4673-884C-6E71F05E3935',
       '01CA24E1-1075-4F8D-875B-6CBA88C42721',
       '12F6415C-B273-4257-A5A9-C48D91E6F149',
       '75D58513-A8A3-4D39-98A0-837445DB77AD',
       '067AC3B2-4BCD-44CF-86D3-84F4D5B8EB86',
       'AD32CBF1-4A08-4BBE-B212-4B2259387FA5',
       '23C185BE-A06A-40EA-889E-4A95D4D8C818',
       'A396349B-FB2B-4048-9D36-FA6ACA33768D',
       '211D9AFB-76D7-434A-85E4-DED596464460',
       'A9026CCD-CAE0-492E-84F2-1EBAA7849A1C'], dtype=object)

In [34]:
transfers.set_index('conversation_id').loc[['610DEEDD-AB9A-420B-A3C9-24D3C5282858',
       '8C68C440-1993-4E6F-A27B-7079FFB2DD5D',
       'EA15568A-5802-4FE0-BA78-DFFFE31D8343',
       'D9802ED9-28BA-45AD-BAA3-099E8F1DF832',
       'AD4DD41D-CFA6-4019-BBF5-FC49F745F8B3',
       '5C0BDBEE-C659-4E2D-957C-257B258D806E',
       '3EF1056B-BEF5-483B-B366-867A26AC8C61',
       'CA653500-6C89-46FB-A6E9-F5D3E24FED39',
       '44A7F555-E964-45CD-9820-45079CE38A51',
       '36D11428-665F-41EA-A688-11C417544BE9']]

Unnamed: 0_level_0,sla_duration,requesting_practice_asid,sending_practice_asid,sender_error_code,final_error_code,intermediate_error_codes,status,date_requested,date_completed,request_completed_ack_codes,requesting_supplier_asid,requesting_supplier,requesting_ods_code,requesting_practice_name,sending_supplier_asid,sending_supplier,sending_ods_code,sending_practice_name
conversation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
610DEEDD-AB9A-420B-A3C9-24D3C5282858,,896286726030,727881965014,,,[],PENDING,2021-02-16 16:25:10.529,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
8C68C440-1993-4E6F-A27B-7079FFB2DD5D,,896286726030,727881965014,,,[],PENDING,2021-02-16 16:21:07.621,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
EA15568A-5802-4FE0-BA78-DFFFE31D8343,,896286726030,727881965014,,,[],PENDING,2020-09-24 10:38:19.435,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
D9802ED9-28BA-45AD-BAA3-099E8F1DF832,,896286726030,727881965014,,,[],PENDING,2021-02-16 16:05:12.517,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
AD4DD41D-CFA6-4019-BBF5-FC49F745F8B3,,896286726030,727881965014,,,[],PENDING,2021-02-17 11:18:33.433,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
5C0BDBEE-C659-4E2D-957C-257B258D806E,,896286726030,727881965014,,,[],PENDING,2021-02-17 10:21:24.772,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
3EF1056B-BEF5-483B-B366-867A26AC8C61,,896286726030,727881965014,,,[],PENDING,2021-02-18 09:05:43.775,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
CA653500-6C89-46FB-A6E9-F5D3E24FED39,,896286726030,727881965014,,,[],PENDING,2021-02-16 14:45:56.849,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
44A7F555-E964-45CD-9820-45079CE38A51,,896286726030,727881965014,,,[],PENDING,2021-02-16 16:15:26.549,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY
36D11428-665F-41EA-A688-11C417544BE9,,896286726030,727881965014,,,[],PENDING,2021-02-18 08:21:40.145,NaT,[],896286726030,Vision,G83027,EVEREST HEALTH PARTNERSHIP,727881965014,Vision,G83663,BRISET CORNER SURGERY


2     183
9       7
10      4
1       2
12      1
11      1
Name: date_requested, dtype: int64

### Practice 1 Query

index="spine2vfmmonitor" service="gp2gp" logReference="MPS0053c" 
| where conversationID in("610DEEDD-AB9A-420B-A3C9-24D3C5282858",
       "8C68C440-1993-4E6F-A27B-7079FFB2DD5D",
       "EA15568A-5802-4FE0-BA78-DFFFE31D8343",
       "D9802ED9-28BA-45AD-BAA3-099E8F1DF832",
       "AD4DD41D-CFA6-4019-BBF5-FC49F745F8B3",
       "5C0BDBEE-C659-4E2D-957C-257B258D806E",
       "3EF1056B-BEF5-483B-B366-867A26AC8C61",
       "CA653500-6C89-46FB-A6E9-F5D3E24FED39",
       "44A7F555-E964-45CD-9820-45079CE38A51",
       "36D11428-665F-41EA-A688-11C417544BE9")
| rex field=fromPartyID "(?<fromNACS>.+?)(-\d+)"
| rex field=toPartyID "(?<toNACS>.+?)(-\d+)"
| eval interactionName=case(
interactionID=="urn:nhs:names:services:gp2gp/RCMR_IN010000UK05", "request started",
interactionID=="urn:nhs:names:services:gp2gp/RCMR_IN030000UK06", "request completed",
interactionID=="urn:nhs:names:services:gp2gp/COPC_IN000001UK01", "common point to point",
interactionID=="urn:nhs:names:services:gp2gp/MCCI_IN010000UK13", "application acknowledgement")
| table conversationID, _time, GUID, interactionID, interactionName, fromNACS, toNACS, messageRef, jdiEvent
| sort _time

In [39]:
practice_1_data['date_requested'].dt.month.value_counts()

2     183
9       7
10      4
1       2
12      1
11      1
Name: date_requested, dtype: int64

In [60]:
practice_data_folder="s3://prm-gp2gp-data-sandbox-dev/PRMT-2023-Practice-Data/"
practice_1_filename="PRMT-2023_Practice_1_Data.csv"
practice_1_data=pd.read_csv(practice_data_folder+practice_1_filename)
practice_1_data=practice_1_data.sort_values(by=['conversationID','_time'])
practice_1_data

Unnamed: 0,conversationID,_time,GUID,interactionID,interactionName,fromNACS,toNACS,messageRef,jdiEvent
9,36D11428-665F-41EA-A688-11C417544BE9,2021-02-18T08:21:40.145+0000,36D11428-665F-41EA-A688-11C417544BE9,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
10,3EF1056B-BEF5-483B-B366-867A26AC8C61,2021-02-18T09:05:43.775+0000,3EF1056B-BEF5-483B-B366-867A26AC8C61,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
4,44A7F555-E964-45CD-9820-45079CE38A51,2021-02-16T16:15:26.549+0000,44A7F555-E964-45CD-9820-45079CE38A51,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
7,5C0BDBEE-C659-4E2D-957C-257B258D806E,2021-02-17T10:21:24.772+0000,5C0BDBEE-C659-4E2D-957C-257B258D806E,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
6,610DEEDD-AB9A-420B-A3C9-24D3C5282858,2021-02-16T16:25:10.529+0000,610DEEDD-AB9A-420B-A3C9-24D3C5282858,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
5,8C68C440-1993-4E6F-A27B-7079FFB2DD5D,2021-02-16T16:21:07.621+0000,8C68C440-1993-4E6F-A27B-7079FFB2DD5D,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
8,AD4DD41D-CFA6-4019-BBF5-FC49F745F8B3,2021-02-17T11:18:33.433+0000,AD4DD41D-CFA6-4019-BBF5-FC49F745F8B3,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
2,CA653500-6C89-46FB-A6E9-F5D3E24FED39,2021-02-16T14:45:56.849+0000,CA653500-6C89-46FB-A6E9-F5D3E24FED39,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
3,D9802ED9-28BA-45AD-BAA3-099E8F1DF832,2021-02-16T16:05:12.517+0000,D9802ED9-28BA-45AD-BAA3-099E8F1DF832,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE
0,EA15568A-5802-4FE0-BA78-DFFFE31D8343,2020-09-24T10:38:19.435+0000,EA15568A-5802-4FE0-BA78-DFFFE31D8343,urn:nhs:names:services:gp2gp/RCMR_IN010000UK05,request started,G83027,G83663,NotProvided,NONE


In [59]:
practice_1_data.groupby('conversationID')['interactionName'].apply(list).value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[request started]                                 9
[request started, application acknowledgement]    1
Name: interactionName, dtype: int64

### Practice 2 Query
index="spine2vfmmonitor" service="gp2gp" logReference="MPS0053c" 
| where conversationID in("0CFB58BA-2EF7-4673-884C-6E71F05E3935",
       "01CA24E1-1075-4F8D-875B-6CBA88C42721",
       "12F6415C-B273-4257-A5A9-C48D91E6F149",
       "75D58513-A8A3-4D39-98A0-837445DB77AD",
       "067AC3B2-4BCD-44CF-86D3-84F4D5B8EB86",
       "AD32CBF1-4A08-4BBE-B212-4B2259387FA5",
       "23C185BE-A06A-40EA-889E-4A95D4D8C818",
       "A396349B-FB2B-4048-9D36-FA6ACA33768D",
       "211D9AFB-76D7-434A-85E4-DED596464460",
       "A9026CCD-CAE0-492E-84F2-1EBAA7849A1C")
| rex field=fromPartyID "(?<fromNACS>.+?)(-\d+)"
| rex field=toPartyID "(?<toNACS>.+?)(-\d+)"
| eval interactionName=case(
interactionID=="urn:nhs:names:services:gp2gp/RCMR_IN010000UK05", "request started",
interactionID=="urn:nhs:names:services:gp2gp/RCMR_IN030000UK06", "request completed",
interactionID=="urn:nhs:names:services:gp2gp/COPC_IN000001UK01", "common point to point",
interactionID=="urn:nhs:names:services:gp2gp/MCCI_IN010000UK13", "application acknowledgement")
| table conversationID, _time, GUID, interactionID, interactionName, fromNACS, toNACS, messageRef, jdiEvent
| sort _time

In [40]:
practice_2_data['date_requested'].dt.month.value_counts()

2     22
9     20
10    19
1     19
12    16
11    12
Name: date_requested, dtype: int64

In [56]:
practice_2_filename="PRMT-2023_Practice_2_Data.csv"
practice_2_data=pd.read_csv(practice_data_folder+practice_2_filename)
practice_2_data=practice_2_data.sort_values(by=['conversationID','_time'])
practice_2_data

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[request started, application acknowledgement]                                                    6
[request started, application acknowledgement, request completed]                                 2
[request started, application acknowledgement, request completed, application acknowledgement]    2
Name: interactionName, dtype: int64

In [61]:
practice_2_data.groupby('conversationID')['interactionName'].apply(list).value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[request started, application acknowledgement]                                                    6
[request started, application acknowledgement, request completed]                                 2
[request started, application acknowledgement, request completed, application acknowledgement]    2
Name: interactionName, dtype: int64