In [0]:
#importing the tools
from env import env
from src import utils, excel

import openpyxl
import pandas as pd

from pyspark.sql import functions as F
from datetime import datetime
from pyspark.sql.functions import date_format
from openpyxl.styles import NamedStyle


In [0]:
#reading in the data
df_raw_sa_csv = spark.read.option("header","true").option("recursiveFileLookup","true").parquet(env["spec_advi_path"])

In [0]:
df_provider_spec_adv_csv = (df_raw_sa_csv
    .select (
        "EROC_DerMonth",
        "RESP_Org_Code",
        "RESP_Org_Name",
        "Metric_Diverted_Requests",
        "Metric_Processed_Requests",
        "In_scope_of_ERF",
        "EROC_TypeOfSpecialistAdvice_Desc",
        "EROC_Requests",
        "Acute_Status")
    .where (F.col("EROC_DerMonth") >= '2022-04-01')
    .where (F.col("Acute_Status") == 'Acute')
    .withColumnRenamed("EROC_DerMonth", "Activity Month")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
    .withColumnRenamed("Metric_Diverted_Requests", "Diverted Requests")
    .withColumnRenamed("Metric_Processed_Requests", "Processed Requests")
    .withColumnRenamed("In_scope_of_ERF", "In scope of ERF?")
    .withColumnRenamed("EROC_TypeOfSpecialistAdvice_Desc", "Type of Specialist Advice")
    .withColumnRenamed("EROC_Requests", "Requests")
    .withColumnRenamed("Acute_Status", "Acute Status")
)
 
df_provider_spec_adv_agg = (
    df_provider_spec_adv_csv.groupBy(
    "Activity Month",
    "Provider Code",
    "Provider Name",
    "Type of Specialist Advice",
    "Diverted Requests",
    "Processed Requests",
    "In scope of ERF?",)
.   agg(F.sum("Requests").alias("Requests"))
.   orderBy("Provider Code", "Type of Specialist Advice", "Activity Month"))
 
display(df_provider_spec_adv_agg)
rows = df_provider_spec_adv_agg.count()
print(rows)
 
# Save as CSV
df_provider_spec_adv_agg_pd = df_provider_spec_adv_agg.toPandas()
df_provider_spec_adv_agg_pd.to_csv('Outputs/Provider specialist advice activity full dataset from April 2022.csv', index=False)
 
 
# ICB specialist advice full dataset from April 2022
 
df_ICB_spec_adv_csv = (df_raw_sa_csv
    .select (
        "EROC_DerMonth",
        "EROC_STP_Code",
        "EROC_STP_Name",
        "Metric_Diverted_Requests",
        "Metric_Processed_Requests",
        "In_scope_of_ERF",
        "EROC_TypeOfSpecialistAdvice_Desc",
        "EROC_Requests")
    .where (F.col("EROC_DerMonth") >= '2022-04-01')
    .withColumnRenamed("EROC_DerMonth", "Activity Month")
    .withColumnRenamed("EROC_STP_Code", "ICB Code")
    .withColumnRenamed("EROC_STP_Name", "ICB Name")
    .withColumnRenamed("Metric_Diverted_Requests", "Diverted Requests")
    .withColumnRenamed("Metric_Processed_Requests", "Processed Requests")
    .withColumnRenamed("In_scope_of_ERF", "In scope of ERF?")
    .withColumnRenamed("EROC_TypeOfSpecialistAdvice_Desc", "Type of Specialist Advice")
    .withColumnRenamed("EROC_Requests", "Requests")
)
 
df_ICB_spec_adv_csv_agg = (df_ICB_spec_adv_csv.groupBy(
    "Activity Month",
    "ICB Code",
    "ICB Name",
    "Diverted Requests",
    "Processed Requests",
    "In scope of ERF?",
    "Type of Specialist Advice",)
.   agg(F.sum("Requests").alias("Requests"))
. orderBy("ICB Code", "Type of Specialist Advice", "Activity Month")
    )
display(df_ICB_spec_adv_csv)    
 
# Save as CSV
df_ICB_spec_adv_csv_pd = df_ICB_spec_adv_csv_agg.toPandas()
df_ICB_spec_adv_csv_pd.to_csv('Outputs/ICB specialist advice activity full dataset from April 2022.csv', index=False)