In [None]:
import os
from pathlib import Path
import pandas as pd

In [None]:
data_dir = Path(os.environ["INPUT_DATA_DIR"])
transfer_input_files = data_dir.rglob("transfers-*.json")
asid_lookup_file = data_dir / "asidLookup.csv"

In [None]:
asid_lookup = pd.read_csv(asid_lookup_file)

In [None]:
date_cols = ["dateRequested", "dateCompleted"]
transfers = pd.concat((
    pd.read_json(f, convert_dates=date_cols)
    for f in transfer_input_files
))

In [None]:
supplier_transfers = transfers\
    .merge(asid_lookup, left_on='requestingPracticeAsid',right_on='ASID',how='left')\
    .merge(asid_lookup, suffixes=('_requesting', '_sending'), left_on='sendingPracticeAsid',right_on='ASID',how='left')\

supplier_transfers["MName_sending"] = supplier_transfers["MName_sending"].fillna("Unknown")
supplier_transfers["MName_requesting"] = supplier_transfers["MName_requesting"].fillna("Unknown")

In [None]:
supplier_transfers["MName_sending"].value_counts(dropna=False)
# TODO: Can we increase the lookup join coverage

In [None]:
monthy_transfer_breakdown_by_supplier_pathways = supplier_transfers[
    ["dateRequested", "MName_requesting", "MName_sending", "status"]
].pivot_table(
    index=[
        supplier_transfers["dateRequested"].dt.year,
        supplier_transfers["dateRequested"].dt.month,
        "MName_requesting",
        "MName_sending"
    ],
    columns=["status"],
    aggfunc=len,
    fill_value=0
)
monthy_transfer_breakdown_by_supplier_pathways

In [None]:
EMIS_name = "EGTON MEDICAL INFORMATION SYSTEMS LTD (EMIS)"
EMIS_monthly_transfer_breakdown = monthy_transfer_breakdown_by_supplier_pathways.iloc[monthy_transfer_breakdown_by_supplier_pathways.index.get_level_values('MName_requesting') == EMIS_name]
EMIS_monthly_transfer_breakdown=EMIS_monthly_transfer_breakdown['dateRequested']

In [None]:
EMIS_monthly_transfer_breakdown.div(EMIS_monthly_transfer_breakdown.sum(axis=1),axis=0)*100

In [None]:
#supplier_transfers["intermediateErrorCodeList"] = supplier_transfers["intermediateErrorCodes"].apply(lambda x: ",".join((str(n) for n in x)))

In [None]:
import paths, data
response_codes = pd.read_csv(data.gp2gp_response_codes.path)
supplier_transfers = supplier_transfers.merge(response_codes, left_on='finalErrorCode',right_on='ErrorCode',how='left')

In [None]:
final_errors_per_suplier_path = supplier_transfers[
    ["MName_requesting", "MName_sending", "finalErrorCode"]
].pivot_table(
    index=[
        "MName_requesting", "MName_sending",
    ],
    columns=["finalErrorCode"],
    aggfunc=len,
    fill_value=0
)
final_errors_per_suplier_path

In [None]:
final_errors_per_suplier_path = supplier_transfers[
    ["MName_requesting", "MName_sending", "ErrorName"]
].pivot_table(
    index=[
        "MName_requesting", "MName_sending",
    ],
    columns=["ErrorName"],
    aggfunc=len,
    fill_value=0
)
final_errors_per_suplier_path