# PRMT-2333 Hypothesis: Practices responsible for EMIS-EMIS sender not LM compliant is same in Aug vs. July 

## Hypothesis

We believe that the practices that cause EMIS-EMIS error code 23s in July will be the same (or nearly the same) as the practices causing these errors in Aug. 
We will know this to be true when we have compared the two data sets and the lists of practices have a correlation.

## Scope

- Generate list of practices that caused EMIS-EMIS error 23s in July, and a separate one for Aug
- Compare the two lists to identify if there is a correlation

In [1]:
import pandas as pd 
import numpy as np
from datetime import datetime

In [2]:
transfer_files = [
    "s3://prm-gp2gp-transfer-data-preprod/v4/2021/7/transfers.parquet",
    "s3://prm-gp2gp-notebook-data-prod/PRMT-2324-2-weeks-august-data/transfers/v4/2021/8/transfers.parquet"
]

transfers_raw = pd.concat((
    pd.read_parquet(f)
    for f in transfer_files
))

transfers = transfers_raw.copy()

In [3]:
asid_lookup_file_location = "s3://prm-gp2gp-asid-lookup-preprod/"
asid_lookup_files = [
    "2021/7/asidLookup.csv.gz",
    "2021/8/asidLookup.csv.gz"    
]
asid_lookup_input_files = [asid_lookup_file_location + f for f in asid_lookup_files]
asid_lookup = pd.concat((
    pd.read_csv(f)
    for f in asid_lookup_input_files
)).drop_duplicates().groupby("ASID").last().reset_index()
lookup = asid_lookup[["ASID", "NACS","OrgName"]]

transfers = transfers.merge(lookup, left_on='requesting_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code','OrgName':'requesting_practice_name'}, axis=1)
transfers = transfers.merge(lookup, left_on='sending_practice_asid',right_on='ASID',how='left')
transfers = transfers.rename({'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code','OrgName':'sending_practice_name'}, axis=1)

### EMIS - EMIS: Sender not Large Message compliant (error 23)

In [4]:
emis_sender_bool = transfers["sending_supplier"]=="EMIS"
emis_requester_bool = transfers["requesting_supplier"]=="EMIS"
sender_error_23_bool = transfers["sender_error_codes"].apply(lambda error_codes: 23 in error_codes)
emis_transfers_with_error_23 = transfers[emis_sender_bool & emis_requester_bool & sender_error_23_bool].copy()

grouped_emis_transfers_with_error_23 = (
    emis_transfers_with_error_23
        .groupby('sending_practice_name')
        .size()
        .to_frame("count")
        .sort_values("count", ascending=False)
)

In [5]:
grouped_emis_transfers_with_error_23

Unnamed: 0_level_0,count
sending_practice_name,Unnamed: 1_level_1
ARCHWAY MEDICAL CENTRE,4
ST CLEMENTS PARTNERSHIP,3
UNIVERSITY HEALTH SERVICE,3
KESTON MEDICAL PRACTICE,3
GODIVA GROUP PRACTICE,2
...,...
HIGHPARKS MEDICAL PRACTICE,1
HIGHERLAND SURGERY,1
HIGH STREET SURGERY,1
HETHERINGTON AT THE PAVILION,1


In [6]:
july_bool = emis_transfers_with_error_23["date_requested"] < datetime(2021, 8, 1)
july_emis_transfers_with_error_23 = emis_transfers_with_error_23[july_bool]

grouped_july_emis_transfers_with_error_23=july_emis_transfers_with_error_23.groupby('sending_practice_name').size().to_frame("count").sort_values("count", ascending=False)
grouped_july_emis_transfers_with_error_23

Unnamed: 0_level_0,count
sending_practice_name,Unnamed: 1_level_1
ARCHWAY MEDICAL CENTRE,3
IVY COURT SURGERY,2
CLARENCE MEDICAL CENTRE,2
UNIVERSITY MEDICAL GROUP,2
CENTRAL GATESHEAD MEDICAL GROUP,2
...,...
HIGHERLAND SURGERY,1
HOLLIES MEDICAL CENTRE,1
HOLLYMOOR MEDICAL CENTRE,1
HORIZON HEALTH CENTRE,1


In [7]:
august_bool = emis_transfers_with_error_23["date_requested"] > datetime(2021, 7, 31)
august_emis_transfers_with_error_23 = emis_transfers_with_error_23[august_bool]

pd.set_option('max_colwidth', 1000)
sample_conversations_in_august = august_emis_transfers_with_error_23.groupby("sending_practice_name").agg({"conversation_id": ["count", list]}).sort_values(by=("conversation_id", "count"), ascending=False)
sample_conversations_in_august.head()

Unnamed: 0_level_0,conversation_id,conversation_id
Unnamed: 0_level_1,count,list
sending_practice_name,Unnamed: 1_level_2,Unnamed: 2_level_2
WOOSEHILL PRACTICE,2,"[BB857117-979B-497B-BE61-F5B2DEEF7FB6, DBC62FE1-5F76-49A4-AF96-8F2E7DAF3757]"
STUDENT HEALTH SERVICE,2,"[0868686E-EE03-40F5-A67D-9B5329C9BDB8, 6ABD79DF-43BD-4AA3-B40F-4151EBFE1303]"
OBSERVATORY MEDICAL PRACTICE,2,"[C166D92C-2E5D-4D03-9B8A-88DBC95A3ECD, 61C78024-0816-4043-8831-D8E88FF00E49]"
ELIZABETH STREET SURGERY,2,"[74242AED-3EBD-4E0C-A9B5-2EDD8D097F70, D8516DBB-BA6C-4E7F-9E5C-AAFC2EBA814E]"
BALMORE PARK SURGERY,2,"[E0EEAE85-4673-4B44-A25B-4243F8D6E797, 56307A4C-E248-4DAF-9C7D-BF70B1C7A53C]"


In [8]:
grouped_august_emis_transfers_with_error_23=august_emis_transfers_with_error_23.groupby('sending_practice_name').size().to_frame("count").sort_values("count", ascending=False)
grouped_august_emis_transfers_with_error_23

Unnamed: 0_level_0,count
sending_practice_name,Unnamed: 1_level_1
WOOSEHILL PRACTICE,2
STUDENT HEALTH SERVICE,2
OBSERVATORY MEDICAL PRACTICE,2
ELIZABETH STREET SURGERY,2
BALMORE PARK SURGERY,2
...,...
KNOLL MEDICAL PRACTICE,1
LANE ENDS SURGERY,1
LANGLEY HEALTH CENTRE,1
LAUNCESTON CLOSE SURGERY,1


#### Practices that had error code 23 in both July and August as senders

In [9]:
practices_with_error_23_july_and_august = grouped_july_emis_transfers_with_error_23.merge(grouped_august_emis_transfers_with_error_23, how='inner', on='sending_practice_name')
practices_with_error_23_july_and_august = practices_with_error_23_july_and_august.rename({'count_x': 'No of transfers in July', 'count_y': 'No of transfers in August'}, axis=1)
practices_with_error_23_july_and_august

Unnamed: 0_level_0,No of transfers in July,No of transfers in August
sending_practice_name,Unnamed: 1_level_1,Unnamed: 2_level_1
ARCHWAY MEDICAL CENTRE,3,1
UNIVERSITY HEALTH SERVICE,2,1
ST CLEMENTS PARTNERSHIP,2,1
SHEPPEY NHS HEALTHCARE CENTRE,1,1
SOUTHERN GROUP PRACTICE,1,1
ST PAUL'S SURGERY,1,1
STONEFIELD STREET SURGERY,1,1
THE SURGERY KINGSTONE,1,1
WHITSTABLE MEDICAL PRACTICE,1,1
MILLGATE HEALTHCARE PARTNERSHIP,1,1


### EMIS - EMIS: Requester not Large Message compliant (error 14)

Checking if the requesting practices are the same in both months as both sides need to have LM enabled for the large transfer to be successful

In [10]:
sender_error_14_bool = transfers["sender_error_codes"].apply(lambda error_codes: 14 in error_codes)
emis_transfers_with_error_14 = transfers[emis_sender_bool & emis_requester_bool & sender_error_14_bool].copy()

grouped_emis_transfers_with_error_14 = emis_transfers_with_error_14.groupby(by='requesting_practice_name').size().to_frame("count").sort_values("count", ascending=False)

In [11]:
grouped_emis_transfers_with_error_14

Unnamed: 0_level_0,count
requesting_practice_name,Unnamed: 1_level_1
THE GREYSWOOD PRACTICE,4
NEXUS HEALTH GROUP,4
AMHERST MEDICAL PRACTICE,3
WHITSTABLE MEDICAL PRACTICE,3
ARCHWAY MEDICAL CENTRE,3
...,...
GRAHAM ROAD SURGERY,1
GOODMAN'S FIELD HEALTH CENTRE,1
GLOUCESTER ROAD MEDICAL CENTRE,1
GLASTONBURY SURGERY,1


In [12]:
july_bool = emis_transfers_with_error_14["date_requested"] < datetime(2021, 8, 1)
july_emis_transfers_with_error_14 = emis_transfers_with_error_14[july_bool]

grouped_july_emis_transfers_with_error_14=july_emis_transfers_with_error_14.groupby('requesting_practice_name').size().to_frame("count").sort_values(by='count', ascending=False)
grouped_july_emis_transfers_with_error_14.rename({'count': 'No of transfers in July'}, axis=1)

Unnamed: 0_level_0,No of transfers in July
requesting_practice_name,Unnamed: 1_level_1
NEXUS HEALTH GROUP,3
MARTINS OAK SURGERY,2
THE GILL MEDICAL PRACTICE,2
THE GREYSWOOD PRACTICE,2
CHURCH ROAD SURGERY,2
...,...
HARTINGTON SURGERY,1
HEATON MOOR MEDICAL GROUP,1
HESWALL & PENSBY GROUP PRACTICE,1
HIGH GLADES MEDICAL CENTRE,1


In [13]:
august_bool = emis_transfers_with_error_14["date_requested"] > datetime(2021, 7, 31)
august_emis_transfers_with_error_14 = emis_transfers_with_error_14[august_bool]

grouped_august_emis_transfers_with_error_14=august_emis_transfers_with_error_14.groupby(by='requesting_practice_name').size().to_frame("count").sort_values("count", ascending=False)
grouped_august_emis_transfers_with_error_14.rename({'count': 'No of transfers in August'}, axis=1)

Unnamed: 0_level_0,No of transfers in August
requesting_practice_name,Unnamed: 1_level_1
THE PRACTICE ALBERT ROAD,2
E HARLING & KENNINGHALL MEDICAL PRACTICE,2
WHITSTABLE MEDICAL PRACTICE,2
GOSBERTON MEDICAL CENTRE,2
THE GREYSWOOD PRACTICE,2
...,...
GRAHAM ROAD SURGERY,1
GREEN LANE MEDICAL CENTRE,1
GREENBANK MEDICAL PRACTICE,1
GUILDOWNS GROUP PRACTICE,1


#### Practices that had error code 14 in both July and August as requestors

In [14]:
practices_with_error_14_july_and_august = grouped_july_emis_transfers_with_error_14.merge(grouped_august_emis_transfers_with_error_14, how='inner', on='requesting_practice_name')
practices_with_error_14_july_and_august.rename({'count_x': 'No of transfers in July', 'count_y': 'No of transfers in August'}, axis=1)

Unnamed: 0_level_0,No of transfers in July,No of transfers in August
requesting_practice_name,Unnamed: 1_level_1,Unnamed: 2_level_1
NEXUS HEALTH GROUP,3,1
THE GREYSWOOD PRACTICE,2,2
ARCHWAY MEDICAL CENTRE,2,1
AMHERST MEDICAL PRACTICE,2,1
WHITSTABLE MEDICAL PRACTICE,1,2
WITTON STREET SURGERY,1,1
MARKET QUARTER MEDICAL PRACTICE,1,1
THE FORUM HEALTH CENTRE,1,1
THORNTON & VALLEY PARK SURGERY,1,1
MANOR ROAD SURGERY,1,1


### Checking if practices appear consistently with LM errors as both senders and requesters

In [15]:
july_recurring_LM_error_practices = grouped_july_emis_transfers_with_error_14.merge(grouped_july_emis_transfers_with_error_23, how='inner', left_index=True, right_index=True)
july_recurring_LM_error_practices.rename({'count_x': 'No of transfers with error 14', 'count_y': 'No of transfers with error 23'}, axis=1)

Unnamed: 0,No of transfers with error 14,No of transfers with error 23
MARTINS OAK SURGERY,2,1
ARCHWAY MEDICAL CENTRE,2,3
MIDDLEWOOD PARTNERSHIP,2,1
ST MARY'S SURGERY,1,1
ST MARYS ISLAND GROUP PRACTICES,1,1
ROCKY LANE MEDICAL CENTRE,1,1
WEST HAMPSTEAD MEDICAL CENTRE,1,1
WHITSTABLE MEDICAL PRACTICE,1,1
MARKET QUARTER MEDICAL PRACTICE,1,1
THE EUXTON MEDICAL CENTRE,1,1


In [16]:
august_recurring_LM_error_practices = grouped_august_emis_transfers_with_error_14.merge(grouped_august_emis_transfers_with_error_23, how='inner',left_index=True, right_index=True)
august_recurring_LM_error_practices.rename({'count_x': 'No of transfers with error 14', 'count_y': 'No of transfers with error 23'}, axis=1)

Unnamed: 0,No of transfers with error 14,No of transfers with error 23
WHITSTABLE MEDICAL PRACTICE,2,1
ST CLEMENTS PARTNERSHIP,1,1
SAXONBURY HOUSE SURGERY,1,1
NEXUS HEALTH GROUP,1,1
RAINBOW MEDICAL CENTRE,1,1
VICTORIA MEDICAL CENTRE,1,1
THE ROYTON & CROMPTON FAMILY PRACTICE,1,1
THE CEDARS SURGERY,1,1
THE MILLER PRACTICE,1,1
CHILCOTE PRACTICE,1,1


### Checking across the full dataset

In [17]:
grouped_practices_with_error_14 = emis_transfers_with_error_14.groupby("requesting_practice_name").size().to_frame("count").sort_values(by='count', ascending=False)
grouped_practices_with_error_23 = emis_transfers_with_error_23.groupby("sending_practice_name").size().to_frame("count").sort_values(by='count', ascending=False)

recurring_LM_error_practices = grouped_practices_with_error_14.merge(grouped_practices_with_error_23, how='inner', left_index=True, right_index=True)
recurring_LM_error_practices.rename({'count_x': 'No of transfers with error 14', 'count_y': 'No of transfers with error 23'}, axis=1)

Unnamed: 0,No of transfers with error 14,No of transfers with error 23
NEXUS HEALTH GROUP,4,1
WHITSTABLE MEDICAL PRACTICE,3,2
ARCHWAY MEDICAL CENTRE,3,4
MARKET QUARTER MEDICAL PRACTICE,2,1
MARTINS OAK SURGERY,2,1
MIDDLEWOOD PARTNERSHIP,2,2
SAXONBURY HOUSE SURGERY,1,1
ROCKY LANE MEDICAL CENTRE,1,1
RINGMEAD MEDICAL PRACTICE,1,1
RAINBOW MEDICAL CENTRE,1,1


### Checking if practices with not LM compliant error had any LM transfers

In [18]:
gp2gp_messages_files = [
    "s3://prm-gp2gp-raw-spine-data-preprod/v2/messages/2021/7/2021-7_spine_messages.csv.gz",
]

gp2gp_messages_raw = pd.concat((
    pd.read_csv(f, parse_dates=["_time"], dtype={"messageRecipient": str, "messageSender": str})
    for f in gp2gp_messages_files
))

gp2gp_messages = gp2gp_messages_raw.copy()

In [19]:
gp2gp_messages = gp2gp_messages.merge(lookup, left_on='messageRecipient',right_on='ASID',how='left')
gp2gp_messages = gp2gp_messages.rename({'ASID': 'requesting_supplier_asid', 'NACS': 'requesting_ods_code','OrgName':'requesting_practice_name'}, axis=1)
gp2gp_messages = gp2gp_messages.merge(lookup, left_on='messageSender',right_on='ASID',how='left')
gp2gp_messages = gp2gp_messages.rename({'ASID': 'sending_supplier_asid', 'NACS': 'sending_ods_code','OrgName':'sending_practice_name'}, axis=1)

In [20]:
is_copc = gp2gp_messages["interactionID"] == "urn:nhs:names:services:gp2gp/COPC_IN000001UK01"
gp2gp_copc_messages = gp2gp_messages[is_copc]

In [21]:
copc_conversations_per_practice = gp2gp_copc_messages[["conversationID", "sending_practice_name"]]\
    .drop_duplicates().groupby("sending_practice_name").size()\
    .to_frame("No of transfers with at least one COPC message")

practices_with_error_23_july_and_august[["No of transfers in July"]]\
    .merge(copc_conversations_per_practice, how="left", left_index=True, right_index=True)\
    .sort_values(by="No of transfers with at least one COPC message", ascending=False)

# Table contains COPC messages from July and transfers with error code 23 from July for practices that had transfers with error code 23 in July and August

Unnamed: 0_level_0,No of transfers in July,No of transfers with at least one COPC message
sending_practice_name,Unnamed: 1_level_1,Unnamed: 2_level_1
ARCHWAY MEDICAL CENTRE,3,331
CENTRAL SURGERY,1,247
MIDDLEWOOD PARTNERSHIP,1,172
WHITSTABLE MEDICAL PRACTICE,1,160
ST PAUL'S SURGERY,1,137
ST CLEMENTS PARTNERSHIP,2,119
UNIVERSITY HEALTH SERVICE,2,114
ASHVILLE SURGERY,1,104
KESTON MEDICAL PRACTICE,1,99
LAVENDER HILL GROUP PRACTICE,1,94


In [22]:
# Checking if there are any practices that had error code 23 and no COPC messages

practices_with_lm_error_and_no_copc_messages = grouped_july_emis_transfers_with_error_23.merge(gp2gp_copc_messages, how="left", on="sending_practice_name").fillna("N/A")
practices_with_lm_error_and_no_copc_messages[practices_with_lm_error_and_no_copc_messages["GUID"]=="N/A"]

Unnamed: 0,sending_practice_name,count,_time,conversationID,GUID,interactionID,messageSender,messageRecipient,messageRef,jdiEvent,toSystem,fromSystem,requesting_supplier_asid,requesting_ods_code,requesting_practice_name,sending_supplier_asid,sending_ods_code
