# PRMT-2270 14 vs 28 day cutoff with re-categorisation

In [1]:
import pandas as pd 
import numpy as np
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [2]:
transfer_file_14_day_cutoff = "s3://prm-gp2gp-data-sandbox-dev/transfers-sample-6/2021-5-transfers_14_day_conversation_cutoff.parquet"
transfers_raw_14_day_cutoff = pd.read_parquet(transfer_file_14_day_cutoff)

transfers_14_day_cutoff = transfers_raw_14_day_cutoff.copy()
transfers_14_day_cutoff["status"] = transfers_14_day_cutoff["status"].str.replace("_", " ").str.title()

In [3]:
transfer_file_28_day_cutoff = "s3://prm-gp2gp-data-sandbox-dev/transfers-sample-6/2021-5-transfers_28_day_conversation_cutoff.parquet"
transfers_raw_28_day_cutoff = pd.read_parquet(transfer_file_28_day_cutoff)

transfers_28_day_cutoff = transfers_raw_28_day_cutoff.copy()
transfers_28_day_cutoff["status"] = transfers_28_day_cutoff["status"].str.replace("_", " ").str.title()

In [4]:
outcome_counts_14_day_cutoff = transfers_14_day_cutoff.fillna("N/A").groupby(by=["status", "failure_reason"]).agg({"conversation_id": "count"})
outcome_counts_14_day_cutoff = outcome_counts_14_day_cutoff.rename({"conversation_id": "Number of transfers", "failure_reason": "Failure Reason"}, axis=1)
outcome_counts_14_day_cutoff["% of transfers"] = (outcome_counts_14_day_cutoff["Number of transfers"] / outcome_counts_14_day_cutoff["Number of transfers"].sum()).multiply(100)

In [5]:
outcome_counts_14_day_cutoff

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of transfers,% of transfers
status,failure_reason,Unnamed: 2_level_1,Unnamed: 3_level_1
Integrated On Time,,193033,87.753042
Process Failure,Integrated Late,6801,3.091743
Process Failure,"Transferred, not integrated",10312,4.687848
Technical Failure,COPC(s) not Acknowledged,199,0.090466
Technical Failure,COPC(s) not sent,24,0.01091
Technical Failure,Contains Fatal Sender Error,3182,1.446541
Technical Failure,Core Extract not Sent,3284,1.49291
Technical Failure,Final Error,1904,0.865561
Technical Failure,Request not Acknowledged,783,0.355953
Unclassified Failure,Ambiguous COPC messages,305,0.138653


In [6]:
outcome_counts_28_day_cutoff = transfers_28_day_cutoff.fillna("N/A").groupby(by=["status", "failure_reason"]).agg({"conversation_id": "count"})
outcome_counts_28_day_cutoff = outcome_counts_28_day_cutoff.rename({"conversation_id": "Number of transfers", "failure_reason": "Failure Reason"}, axis=1).astype('int32')
outcome_counts_28_day_cutoff["% of transfers"] = (outcome_counts_28_day_cutoff["Number of transfers"] / outcome_counts_28_day_cutoff["Number of transfers"].sum()).multiply(100)

In [7]:
outcome_counts_28_day_cutoff

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of transfers,% of transfers
status,failure_reason,Unnamed: 2_level_1,Unnamed: 3_level_1
Integrated On Time,,193221,87.838507
Process Failure,Integrated Late,11235,5.107445
Process Failure,"Transferred, not integrated",5908,2.685784
Technical Failure,COPC(s) not Acknowledged,123,0.055916
Technical Failure,COPC(s) not sent,19,0.008637
Technical Failure,Contains Fatal Sender Error,3182,1.446541
Technical Failure,Core Extract not Sent,3248,1.476545
Technical Failure,Final Error,1984,0.901929
Technical Failure,Request not Acknowledged,741,0.33686
Unclassified Failure,Ambiguous COPC messages,212,0.096375


In [8]:
# High level summary of diff based on status
transfers_28_day_cutoff.fillna("N/A").groupby(by=["status", "failure_reason"]).agg({"conversation_id": "count"}).rename(columns={"conversation_id": "total difference"}) - transfers_14_day_cutoff.fillna("N/A").groupby(by=["status", "failure_reason"]).agg({"conversation_id": "count"}).rename(columns={"conversation_id": "total difference"})

Unnamed: 0_level_0,Unnamed: 1_level_0,total difference
status,failure_reason,Unnamed: 2_level_1
Integrated On Time,,188
Process Failure,Integrated Late,4434
Process Failure,"Transferred, not integrated",-4404
Technical Failure,COPC(s) not Acknowledged,-76
Technical Failure,COPC(s) not sent,-5
Technical Failure,Contains Fatal Sender Error,0
Technical Failure,Core Extract not Sent,-36
Technical Failure,Final Error,80
Technical Failure,Request not Acknowledged,-42
Unclassified Failure,Ambiguous COPC messages,-93


In [9]:
outcome = outcome_counts_14_day_cutoff.compare(outcome_counts_28_day_cutoff, keep_equal=True, keep_shape=True).round(2).rename(columns={"self":"14 day cutoff","other":"28 day cutoff"})
outcome["Difference"] = (outcome["Number of transfers"]["28 day cutoff"] - outcome["Number of transfers"]["14 day cutoff"]).astype('int32')
outcome["% Difference"] = (outcome["% of transfers"]["28 day cutoff"] - outcome["% of transfers"]["14 day cutoff"])

outcome[[
 ('Number of transfers', '14 day cutoff'),
 ('Number of transfers', '28 day cutoff'),
 ('Difference', ''),     
 ('% of transfers', '14 day cutoff'),
 ('% of transfers', '28 day cutoff'),
 ('% Difference', '')         
 ]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of transfers,Number of transfers,Difference,% of transfers,% of transfers,% Difference
Unnamed: 0_level_1,Unnamed: 1_level_1,14 day cutoff,28 day cutoff,Unnamed: 4_level_1,14 day cutoff,28 day cutoff,Unnamed: 7_level_1
status,failure_reason,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Integrated On Time,,193033,193221,188,87.75,87.84,0.09
Process Failure,Integrated Late,6801,11235,4434,3.09,5.11,2.02
Process Failure,"Transferred, not integrated",10312,5908,-4404,4.69,2.69,-2.0
Technical Failure,COPC(s) not Acknowledged,199,123,-76,0.09,0.06,-0.03
Technical Failure,COPC(s) not sent,24,19,-5,0.01,0.01,0.0
Technical Failure,Contains Fatal Sender Error,3182,3182,0,1.45,1.45,0.0
Technical Failure,Core Extract not Sent,3284,3248,-36,1.49,1.48,-0.01
Technical Failure,Final Error,1904,1984,80,0.87,0.9,0.03
Technical Failure,Request not Acknowledged,783,741,-42,0.36,0.34,-0.02
Unclassified Failure,Ambiguous COPC messages,305,212,-93,0.14,0.1,-0.04
