# Review of practices implementing large messaging

This notebook is to compare the GP2GP performance of two specific practices that have implemented GP2GP 2.2b (allowing for large messaging) on 12th January 2021.

Note when looking at the counts - we are comparing transfers that were initiated in September up until 12th January compared to transfers between 12th January to 17th March (inclusive).

In [1]:
import pandas as pd
import paths, data
from datetime import datetime

In [2]:
transfer_file_location = "s3://prm-gp2gp-data-sandbox-dev/transfers-sample-3/"
transfer_files = [
    "9-2020-transfers.parquet",
    "10-2020-transfers.parquet",
    "11-2020-transfers.parquet",
    "12-2020-transfers.parquet",
    "1-2021-transfers.parquet",
    "2-2021-transfers.parquet",
    "partial-3-2021-transfers.parquet"
]
transfer_input_files = [transfer_file_location + f for f in transfer_files]
transfers = pd.concat((
    pd.read_parquet(f)
    for f in transfer_input_files
))

In [3]:
asid_lookup_file_location ="s3://prm-gp2gp-data-sandbox-dev/asid-lookup/"
asid_lookup_files = [
    "asidLookup-Dec-2020.csv.gz",
    "asidLookup-Jan-2021.csv.gz",
    "asidLookup-Feb-2021.csv.gz",
    "asidLookup-Mar-2021.csv.gz",
]
asid_lookup_input_files = [asid_lookup_file_location + f for f in asid_lookup_files]
asid_lookups = pd.concat((
    pd.read_csv(f)
    for f in asid_lookup_input_files
))

In [4]:
def transfers_filtered_by_date(date_time):
    return transfers[transfers["date_requested"] < date_time], transfers[transfers["date_requested"] > date_time]

In [5]:
mid_jan = datetime(year=2021, month=1, day=13, hour=10, minute=40)
transfers_pre_mid_jan, transfers_post_mid_jan = transfers_filtered_by_date(mid_jan)

In [6]:
large_messaging_vision_practices = asid_lookups["NACS"].isin(["K81607", "K81089"])
asids_for_large_messaging_practices = asid_lookups.loc[large_messaging_vision_practices]["ASID"].unique()
asids_for_large_messaging_practices

array(['004730850049', '642159719037'], dtype=object)

In [7]:
def filter_transfers(has_large_messaging, is_sending_practice):
    transfers = transfers_post_mid_jan if has_large_messaging else transfers_pre_mid_jan
    practice_key = "sending_practice_asid" if is_sending_practice else "requesting_practice_asid"
    return transfers.loc[transfers[practice_key].isin(asids_for_large_messaging_practices)]

In [8]:
df = pd.DataFrame(data={'is_sending_practice': [True, False, True, False], 'has_large_messaging': [False, False, True, True]})

In [9]:
def calculate_totals(transfers):
    return len(transfers.index)

df["total_transfers"] = df.apply(lambda row: 
                     calculate_totals(filter_transfers(row["has_large_messaging"], row["is_sending_practice"]))
                     , axis = 1)

In [10]:
def count_final_error_codes(transfers):
    return transfers["final_error_code"].dropna().tolist()
   
    
df["final_errors"] = df.apply(lambda row: 
                     count_final_error_codes(filter_transfers(row["has_large_messaging"], row["is_sending_practice"]))
                     , axis = 1)


In [11]:
def count_intermediate_error_codes(transfers):
    interm_errors = transfers.loc[transfers["intermediate_error_codes"].apply(lambda x: len(x) > 0)]
    return interm_errors["intermediate_error_codes"].sum()

df["intermediate_error_count"] = df.apply(lambda row:
                                count_intermediate_error_codes(filter_transfers(row["has_large_messaging"], row["is_sending_practice"])), axis = 1)

In [12]:
def count_sender_error_codes(transfers):
        return transfers["sender_error_code"].dropna().tolist()

df["sender_error_codes"] = df.apply(lambda row:
                                count_sender_error_codes(filter_transfers(row["has_large_messaging"], row["is_sending_practice"])), axis = 1)

In [13]:
def count_transfer_status(transfers, status):
    default_transfer_status_count = {
        "PENDING_WITH_ERROR": 0,
        "FAILED": 0,
        "INTEGRATED": 0,
        "PENDING": 0
    }
    actual_transfer_status_count = transfers["status"].value_counts().to_dict()
    return {**default_transfer_status_count, **actual_transfer_status_count}[status]

df["status_pending_with_error"] = df.apply(lambda row:
                                count_transfer_status(filter_transfers(row["has_large_messaging"], row["is_sending_practice"]), "PENDING_WITH_ERROR"), axis = 1)


df["status_failed"] = df.apply(lambda row:
                                count_transfer_status(filter_transfers(row["has_large_messaging"], row["is_sending_practice"]), "FAILED"), axis = 1)



df["status_pending"] = df.apply(lambda row:
                                count_transfer_status(filter_transfers(row["has_large_messaging"], row["is_sending_practice"]), "PENDING"), axis = 1)


df["status_integrated"] = df.apply(lambda row:
                                count_transfer_status(filter_transfers(row["has_large_messaging"], row["is_sending_practice"]), "INTEGRATED"), axis = 1)



In [14]:
pd.set_option('display.max_colwidth',600)

pd.pivot_table(df, index=["has_large_messaging", "is_sending_practice"], aggfunc=lambda x: x.astype(str))

Unnamed: 0_level_0,Unnamed: 1_level_0,final_errors,intermediate_error_count,sender_error_codes,status_failed,status_integrated,status_pending,status_pending_with_error,total_transfers
has_large_messaging,is_sending_practice,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
False,False,[11.0],0,"[14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0]",1,8,1,11,21
False,True,[15.0],0,"[23.0, 23.0, 23.0, 23.0, 23.0]",0,91,33,5,129
True,False,[],0,[30.0],0,26,0,1,27
True,True,"[15.0, 15.0, 15.0, 15.0]",0,"[10.0, 10.0, 10.0, 6.0, 6.0]",0,52,20,5,77
