# QA PRMT-2490 - List of Practices with transfers from Aug -Dec 2021

## Context
In order to find newly formed practices, we need to list Practices with their transfers from Aug-Dec 2021, group by ODS code and sort.

In [1]:
import pandas as pd 
import numpy as np
import paths, data
from sklearn.preprocessing import MultiLabelBinarizer
from datetime import datetime
from data.practice_metadata import read_asid_metadata

## Import transfer dataset

In [2]:
error_code_lookup_file = pd.read_csv(data.gp2gp_response_codes.path)
error_code_lookup = error_code_lookup_file.set_index("ErrorCode")["ErrorName"]

In [3]:
asid_lookup = read_asid_metadata("prm-gp2gp-ods-metadata-dev", "v2/2022/1/organisationMetadata.json")
data_folder="s3://prm-gp2gp-transfer-data-dev/v6/2021"

transfer_parquet_files = [f"{data_folder}/{month}/2021-{month}-transfers.parquet" for month in range(8, 13)]

transfers_raw = pd.concat([pd.read_parquet(file) for file in transfer_parquet_files])
transfers = transfers_raw\
    .join(asid_lookup.add_prefix("requesting_"), on="requesting_practice_asid", how="left")\
    .join(asid_lookup.add_prefix("sending_"), on="sending_practice_asid", how="left")\

transfers["status"] = transfers["status"].str.replace("_", " ").str.capitalize()


In [4]:
transfers.shape[0]

1220244

In [5]:
qa_data = transfers.groupby(by=["requesting_practice_ods_code"]).agg({"conversation_id": "count"}).sort_values(by=["conversation_id"],ascending=True)
qa_data = qa_data.reset_index()

In [6]:
with pd.ExcelWriter("QA-PRMT-2490-list-of-practices.xlsx") as writer:
     qa_data.to_excel(writer, sheet_name="Pracices",index=False)
