In [0]:
#importing the tools
from env import env
from src import utils, excel

import openpyxl
import pandas as pd

from pyspark.sql import functions as F
from datetime import datetime
from pyspark.sql.functions import date_format
from openpyxl.styles import NamedStyle

In [0]:
# Load the SA data
df_raw_sa = spark.read.option("header","true").option("recursiveFileLookup","true").parquet(env["spec_advi_path"])
wb = openpyxl.load_workbook('Provider template.xlsx')
report_start = 'April 2022 to '

#display (spec_advi_path)


publishing_month = df_raw_sa.select(F.max("EROC_DerMonth")).collect()[0][0]
publishing_month = datetime.strptime(publishing_month, '%Y-%m-%d')
publishing_month = publishing_month.strftime("%B %Y")
date_header = (report_start + publishing_month) 

#print(date_header)
display(df_raw_sa)

In [0]:
#Total requests for all types of SA
df_total_all_types_sa = df_raw_sa.where((F.col("EROC_DerMonth") > '2022-03-01') & (F.col("Acute_Status") == "Acute"))
    
df_total_all_types_sa = df_total_all_types_sa.groupby(
        "EROC_DerMonth",
        "RESP_Org_Code",
        "RESP_Org_Name"
    ).agg(F.sum("EROC_Requests").alias("Requests")).orderBy(
        "EROC_DerMonth",
        "RESP_Org_Code"
    )

# Select only the columns you need
df_total_all_types_sa = df_total_all_types_sa.select(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name",
    "Requests"
)

display(df_total_all_types_sa)

In [0]:
#putting the data in pivot table format
df_total_all_types_sa_pivot = (df_total_all_types_sa
    .groupby(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .pivot("EROC_DerMonth")
    .agg(F.sum("Requests"))
    .orderBy(
        "RESP_Org_Code",
        "RESP_Org_Name",
        )
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_total_all_types_sa_pivot)

In [0]:
#1.Outputs of Total requests for all types sa formatting date headers along the pivot
from datetime import datetime

for column in df_total_all_types_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_total_all_types_sa_pivot = df_total_all_types_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_total_all_types_sa_pivot = df_total_all_types_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['1) All Types of Spec Advice']

excel.insert_pandas_df_into_excel(
    df=df_pd_total_all_types_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=14,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_total_all_types_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_total_all_types_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 14 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D14"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
#Processed requests for all types of SA
df_processed_all_types_sa = df_raw_sa.where((F.col("EROC_DerMonth") > '2022-03-01') & 
                                            (F.col("Acute_Status") == "Acute") & 
                                            (F.col("EROC_DerStatus") != 2) & 
                                            (F.col("EROC_DerOutcome") != 40))

df_processed_all_types_sa = df_processed_all_types_sa.groupby(
        "EROC_DerMonth",
        "RESP_Org_Code",
        "RESP_Org_Name"
    ).agg(F.sum("EROC_Requests").alias("Requests")).orderBy(
        "EROC_DerMonth",
        "RESP_Org_Code"
    )

# Select only the columns you need
df_processed_all_types_sa = df_processed_all_types_sa.select(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name",
    "Requests"
)

display(df_processed_all_types_sa)

In [0]:
#putting the data in pivot table format
df_processed_all_types_sa_pivot = (df_processed_all_types_sa
    .groupby(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .pivot("EROC_DerMonth")
    .agg(F.sum("Requests"))
    .orderBy(
        "RESP_Org_Code",
        "RESP_Org_Name",
        )
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_processed_all_types_sa_pivot)


In [0]:
#1.Outputs of processed requests for all types sa formatting date headers along the pivot
from datetime import datetime

for column in df_processed_all_types_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_processed_all_types_sa_pivot = df_processed_all_types_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_processed_all_types_sa_pivot = df_processed_all_types_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['1) All Types of Spec Advice']

excel.insert_pandas_df_into_excel(
    df=df_pd_processed_all_types_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=156,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_processed_all_types_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_processed_all_types_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 155 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D156"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        

# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
#Diverted requests for all types of SA 
df_diverted_all_types_sa = df_raw_sa.where((F.col("EROC_DerMonth") > '2022-03-01') & 
                                            (F.col("Acute_Status") == "Acute") & 
                                            (F.col("EROC_DerStatus") != 2) & 
                                            (F.col("EROC_DerOutcome").isin (10,12)))

df_diverted_all_types_sa = df_diverted_all_types_sa.groupby(
        "EROC_DerMonth",
        "RESP_Org_Code",
        "RESP_Org_Name"
    ).agg(F.sum("EROC_Requests").alias("Requests")).orderBy(
        "EROC_DerMonth",
        "RESP_Org_Code"
    )

# Select only the columns you need
df_diverted_all_types_sa = df_diverted_all_types_sa.select(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name",
    "Requests"
)

display(df_diverted_all_types_sa)

In [0]:
#putting the data in pivot table format
df_diverted_all_types_sa_pivot = (df_diverted_all_types_sa
    .groupby(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .pivot("EROC_DerMonth")
    .agg(F.sum("Requests"))
    .orderBy(
        "RESP_Org_Code",
        "RESP_Org_Name",
        )
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_diverted_all_types_sa_pivot)

In [0]:
#1.Outputs of diverted requests for all types sa formatting date headers along the pivot
from datetime import datetime

for column in df_diverted_all_types_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_diverted_all_types_sa_pivot = df_diverted_all_types_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_diverted_all_types_sa_pivot = df_diverted_all_types_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['1) All Types of Spec Advice']

excel.insert_pandas_df_into_excel(
    df=df_pd_diverted_all_types_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=299,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_diverted_all_types_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 299):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_diverted_all_types_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 298 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D299"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
#Total pre referral
from pyspark.sql.functions import col, to_date, lit, sum as F_sum

# Step 0: Ensure EROC_DerMonth is a date
df_raw_sa = df_raw_sa.withColumn("EROC_DerMonth", to_date("EROC_DerMonth"))

# Step 1: Get all Acute providers
df_all_orgs = (
    df_raw_sa.filter(col("Acute_Status") == "Acute")
    .select("RESP_Org_Code", "RESP_Org_Name")
    .distinct()
)

# Step 2: Filter for pre-referral SA requests only
df_total_pre_referral_sa = (
    df_raw_sa.filter(
        (col("EROC_DerMonth") > lit("2022-03-01")) &
        (col("Acute_Status") == "Acute") &
        (col("EROC_DerTypeOfSpecialistAdvice") == 1) #&
        #(col("EROC_DerOutcome") != 40)
    )
    .select("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name", "EROC_Requests")
)

# Step 3: Group and aggregate
df_total_pre_referral_sa = (
    df_total_pre_referral_sa
    .groupby("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name")
    .agg(F_sum("EROC_Requests").alias("Requests"))
)

# Step 4: Cross-join with all orgs and all months to preserve missing combinations

# Get all months in the dataset
all_months_df = (
    df_total_pre_referral_sa
    .select("EROC_DerMonth")
    .distinct()
    .filter(col("EROC_DerMonth").isNotNull())
)

# DEBUG: Print all available months
#print(" Available Months (Before Pivot):")
all_months = all_months_df.orderBy("EROC_DerMonth").rdd.flatMap(lambda x: x).collect()
#print(all_months)

# Create full grid: all_orgs x all_months
full_grid = (
    df_all_orgs.crossJoin(all_months_df)
)

# Left join aggregated data to preserve all org/month combos
df_total_pre_referral_sa_full = (
    full_grid.join(
        df_total_pre_referral_sa,
        on=["RESP_Org_Code", "RESP_Org_Name", "EROC_DerMonth"],
        how="left"
    )
    .fillna(0, subset=["Requests"])
)

#  Step 5: Pivot table with all valid months
valid_months = all_months  # Already collected above

df_total_pre_referral_sa_pivot = (
    df_total_pre_referral_sa_full
    .groupby("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth", values=valid_months)
    .agg(F_sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)
display(df_total_pre_referral_sa_pivot)



In [0]:
#Ignoring any merged cells in template
from openpyxl.cell.cell import MergedCell

def safe_insert_pandas_df_into_excel(df, ws, header, startrow, startcol, index):
    """
    Inserts DataFrame into Excel sheet, skipping merged cells.
    """
    # Write headers if needed
    col_offset = 1 if index else 0
    if header:
        for j, col_name in enumerate(df.columns):
            cell = ws.cell(row=startrow, column=startcol + j + col_offset)
            if not isinstance(cell, MergedCell):
                cell.value = col_name

        startrow += 1  # move to next row for data

    # Write the data
    for i, row in enumerate(df.itertuples(index=index, name=None)):
        for j, val in enumerate(row):
            cell = ws.cell(row=startrow + i, column=startcol + j)
            if not isinstance(cell, MergedCell):
                cell.value = val

In [0]:
#1.Outputs of Total requests for pre referral sa formatting date headers along the pivot
from datetime import datetime

for column in df_total_pre_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_total_pre_referral_sa_pivot = df_total_pre_referral_sa_pivot.withColumnRenamed(column, month_format)

#2.converting the pivot to pandas dataframe
df_pd_total_pre_referral_sa_pivot = df_total_pre_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['2) Pre referral Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_total_pre_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=14,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_total_pre_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)

#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_total_pre_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 14 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)

#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D14"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format

# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
# Processed requests for pre referral of SA
df_processed_pre_referral_sa = df_raw_sa.where(
    (col("EROC_DerMonth") > '2022-03-01') & 
    (col("Acute_Status") == "Acute") & 
    (col("EROC_DerTypeOfSpecialistAdvice") == 1) &
    (col("EROC_DerStatus") != 2) & 
    (col("EROC_DerOutcome") != 40)
)

# Select only the columns you need
df_processed_pre_referral_sa = df_processed_pre_referral_sa.select(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name",
    "EROC_Requests"
)

# Group and aggregate
df_processed_pre_referral_sa = df_processed_pre_referral_sa.groupby(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name"
).agg(F_sum("EROC_Requests").alias("Requests")).orderBy(
    "EROC_DerMonth",
    "RESP_Org_Code"
)

# Get all Acute providers
df_all_orgs = (
    df_raw_sa.filter(col("Acute_Status") == "Acute")
    .select("RESP_Org_Code", "RESP_Org_Name")
    .distinct()
)

# Get all months in the dataset
all_months_df = (
    df_processed_pre_referral_sa
    .select("EROC_DerMonth")
    .distinct()
    .filter(col("EROC_DerMonth").isNotNull())
)

# Create full grid: all_orgs x all_months
full_grid = (
    df_all_orgs.crossJoin(all_months_df)
)

# Left join aggregated data to preserve all org/month combos
df_processed_pre_referral_sa_full = (
    full_grid.join(
        df_processed_pre_referral_sa,
        on=["RESP_Org_Code", "RESP_Org_Name", "EROC_DerMonth"],
        how="left"
    )
    .fillna(0, subset=["Requests"])
)

# Putting the data in pivot table format
valid_months = all_months_df.orderBy("EROC_DerMonth").rdd.flatMap(lambda x: x).collect()

df_processed_pre_referral_sa_pivot = (
    df_processed_pre_referral_sa_full
    .groupby("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth", values=valid_months)
    .agg(F_sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_processed_pre_referral_sa_pivot)

In [0]:
#1.Outputs of processed requests for pre referral sa formatting date headers along the pivot
from datetime import datetime

for column in df_processed_pre_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_processed_pre_referral_sa_pivot = df_processed_pre_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_processed_pre_referral_sa_pivot = df_processed_pre_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['2) Pre referral Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_processed_pre_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=156,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_processed_pre_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_processed_pre_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 155 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D156"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
# Diverted requests for pre_referral SA
from pyspark.sql.functions import col, to_date, lit, sum as F_sum

# Step 0: Ensure EROC_DerMonth is a date
df_raw_sa = df_raw_sa.withColumn("EROC_DerMonth", to_date("EROC_DerMonth"))

# Step 1: Get all Acute providers
df_all_orgs = (
    df_raw_sa.filter(col("Acute_Status") == "Acute")
    .select("RESP_Org_Code", "RESP_Org_Name")
    .distinct()
)

# Step 2: Filter for pre-referral SA requests only
df_diverted_pre_referral_sa = (
    df_raw_sa.filter(
        (col("EROC_DerMonth") > lit("2022-03-01")) &
        (col("Acute_Status") == "Acute") &
        (col("EROC_DerTypeOfSpecialistAdvice") == 1) &
        (col("EROC_DerOutcome").isin([10, 12])) &
        (col("EROC_DerStatus") != 2)
    )
    .select("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name", "EROC_Requests")
)

# Step 3: Group and aggregate
df_diverted_pre_referral_sa = (
    df_diverted_pre_referral_sa
    .groupby("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name")
    .agg(F_sum("EROC_Requests").alias("Requests"))
)

# Step 4: Cross-join with all orgs and all months to preserve missing combinations

# Get all months in the dataset
all_months_df = (
    df_diverted_pre_referral_sa
    .select("EROC_DerMonth")
    .distinct()
    .filter(col("EROC_DerMonth").isNotNull())
)

# Create full grid: all_orgs x all_months
full_grid = (
    df_all_orgs.crossJoin(all_months_df)
)

# Left join aggregated data to preserve all org/month combos
df_diverted_pre_referral_sa_full = (
    full_grid.join(
        df_diverted_pre_referral_sa,
        on=["RESP_Org_Code", "RESP_Org_Name", "EROC_DerMonth"],
        how="left"
    )
    .fillna(0, subset=["Requests"])
)

# Step 5: Pivot table with all valid months
valid_months = all_months_df.orderBy("EROC_DerMonth").rdd.flatMap(lambda x: x).collect()

df_diverted_pre_referral_sa_pivot = (
    df_diverted_pre_referral_sa_full
    .groupby("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth", values=valid_months)
    .agg(F_sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_diverted_pre_referral_sa_pivot)

In [0]:
#1.Outputs of diverted requests for pre referral sa formatting date headers along the pivot
from datetime import datetime

for column in df_diverted_pre_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_diverted_pre_referral_sa_pivot = df_diverted_pre_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_diverted_pre_referral_sa_pivot = df_diverted_pre_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['2) Pre referral Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_diverted_pre_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=299,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_diverted_pre_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 299):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_diverted_pre_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 298 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D299"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
# Total requests for post referral of SA
from pyspark.sql.functions import col, to_date, lit, sum as F_sum

# Step 0: Ensure EROC_DerMonth is a date
df_raw_sa = df_raw_sa.withColumn("EROC_DerMonth", to_date("EROC_DerMonth"))

# Step 1: Get all Acute providers
df_all_orgs = (
    df_raw_sa.filter(col("Acute_Status") == "Acute")
    .select("RESP_Org_Code", "RESP_Org_Name")
    .distinct()
)

# Step 2: Filter for post-referral SA requests only
df_total_post_referral_sa = (
    df_raw_sa.filter(
        (col("EROC_DerMonth") > lit("2022-03-01")) &
        (col("Acute_Status") == "Acute") &
        (col("EROC_DerTypeOfSpecialistAdvice") == 2) #&
        #(col("EROC_DerOutcome") != 40)
    )
    .select("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name", "EROC_Requests")
)

# Step 3: Group and aggregate
df_total_post_referral_sa = (
    df_total_post_referral_sa
    .groupby("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name")
    .agg(F_sum("EROC_Requests").alias("Requests"))
)

# Step 4: Cross-join with all orgs and all months to preserve missing combinations

# Get all months in the dataset
all_months_df = (
    df_total_post_referral_sa
    .select("EROC_DerMonth")
    .distinct()
    .filter(col("EROC_DerMonth").isNotNull())
)

# DEBUG: Print all available months
#print(" Available Months (Before Pivot):")
all_months = all_months_df.orderBy("EROC_DerMonth").rdd.flatMap(lambda x: x).collect()
#print(all_months)

# Create full grid: all_orgs x all_months
full_grid = (
    df_all_orgs.crossJoin(all_months_df)
)

# Left join aggregated data to preserve all org/month combos
df_total_post_referral_sa_full = (
    full_grid.join(
        df_total_post_referral_sa,
        on=["RESP_Org_Code", "RESP_Org_Name", "EROC_DerMonth"],
        how="left"
    )
    .fillna(0, subset=["Requests"])
)

#  Step 5: Pivot table with all valid months
valid_months = all_months  # Already collected above

df_total_post_referral_sa_pivot = (
    df_total_post_referral_sa_full
    .groupby("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth", values=valid_months)
    .agg(F_sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)
display(df_total_post_referral_sa_pivot)



In [0]:
#1.Outputs of Total requests for post referral sa formatting date headers along the pivot
from datetime import datetime

for column in df_total_post_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_total_post_referral_sa_pivot = df_total_post_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_total_post_referral_sa_pivot = df_total_post_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['3) Post referral Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_total_post_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=14,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_total_post_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_total_post_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 14 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D14"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
# Processed requests for post_referral SA
df_processed_post_referral_sa = df_raw_sa.where(
    (F.col("EROC_DerMonth") > '2022-03-01') & 
    (F.col("Acute_Status") == "Acute") & 
    (F.col("EROC_DerStatus") != 2) & 
    (F.col("EROC_DerOutcome") != 40) & 
    (F.col("EROC_DerTypeOfSpecialistAdvice") == 2)
)

df_processed_post_referral_sa = df_processed_post_referral_sa.groupby(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name"
).agg(F.sum("EROC_Requests").alias("Requests")).orderBy(
    "EROC_DerMonth",
    "RESP_Org_Code"
)

# Select only the columns you need
df_processed_post_referral_sa = df_processed_post_referral_sa.select(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name",
    "Requests"
)

# putting the data in pivot table format
df_processed_post_referral_sa_pivot = (df_processed_post_referral_sa
    .groupby(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .pivot("EROC_DerMonth")
    .agg(F.sum("Requests"))
    .orderBy(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_processed_post_referral_sa_pivot)

In [0]:
#1.Outputs of processed requests for post referral sa formatting date headers along the pivot
from datetime import datetime

for column in df_processed_post_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_processed_post_referral_sa_pivot = df_processed_post_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_processed_post_referral_sa_pivot = df_processed_post_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['3) Post referral Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_processed_post_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=156,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_processed_post_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_processed_post_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 155 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D156"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
# Diverted requests for post_referral SA 
df_diverted_post_referral_sa = df_raw_sa.where(
    (F.col("EROC_DerMonth") > '2022-03-01') & 
    (F.col("Acute_Status") == "Acute") & 
    (F.col("EROC_DerStatus") != 2) & 
    (F.col("EROC_DerOutcome").isin(10, 12)) & 
    (F.col("EROC_DerTypeOfSpecialistAdvice") == 2)
)

df_diverted_post_referral_sa = df_diverted_post_referral_sa.groupby(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name"
).agg(F.sum("EROC_Requests").alias("Requests")).orderBy(
    "EROC_DerMonth",
    "RESP_Org_Code"
)

# Select only the columns you need
df_diverted_post_referral_sa = df_diverted_post_referral_sa.select(
    "EROC_DerMonth",
    "RESP_Org_Code",
    "RESP_Org_Name",
    "Requests"
)

# putting the data in pivot table format
df_diverted_post_referral_sa_pivot = (df_diverted_post_referral_sa
    .groupby(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .pivot("EROC_DerMonth")
    .agg(F.sum("Requests"))
    .orderBy(
        "RESP_Org_Code",
        "RESP_Org_Name",
    )
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

display(df_diverted_post_referral_sa_pivot)

In [0]:
#1.Outputs of diverted requests for post referral sa formatting date headers along the pivot
from datetime import datetime

for column in df_diverted_post_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_diverted_post_referral_sa_pivot = df_diverted_post_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_diverted_post_referral_sa_pivot = df_diverted_post_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['3) Post referral Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_diverted_post_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=299,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_diverted_post_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 299):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_diverted_post_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 298 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D299"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
from pyspark.sql.functions import col, to_date, lit, sum as F_sum

#  Step 0: Ensure date column is properly typed
df_raw_sa = df_raw_sa.withColumn("EROC_DerMonth", to_date("EROC_DerMonth"))

#  Step 1: All Acute orgs
df_all_orgs = (
    df_raw_sa.filter(col("Acute_Status") == "Acute")
    .select("RESP_Org_Code", "RESP_Org_Name")
    .distinct()
)

#  Step 2: Filter for type 99
df_total_other_referral_sa = (
    df_raw_sa.filter(
        (col("EROC_DerMonth") > lit("2022-03-01")) &
        (col("Acute_Status") == "Acute") &
        (col("EROC_DerTypeOfSpecialistAdvice") == 99)
    )
    .select("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name", "EROC_Requests")
)

#  Step 3: Aggregate
df_total_other_referral_sa = (
    df_total_other_referral_sa
    .groupby("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name")
    .agg(F_sum("EROC_Requests").alias("Requests"))
)

#  Step 4: Get all valid months in data
all_months_df = (
    df_total_other_referral_sa
    .select("EROC_DerMonth")
    .distinct()
    .filter(col("EROC_DerMonth").isNotNull())
)

#  DEBUG: Show all months
#print(" Available Months for Type 99:")
valid_months = all_months_df.orderBy("EROC_DerMonth").rdd.flatMap(lambda x: x).collect()
#print(valid_months)

#  Step 5: Build org × month grid
full_grid = df_all_orgs.crossJoin(all_months_df)

#  Step 6: Join actual data to full grid
df_total_other_referral_sa_full = (
    full_grid.join(
        df_total_other_referral_sa,
        on=["RESP_Org_Code", "RESP_Org_Name", "EROC_DerMonth"],
        how="left"
    )
    .fillna(0, subset=["Requests"])
)

#  Step 7: Pivot table with all valid months
df_total_other_referral_sa_pivot = (
    df_total_other_referral_sa_full
    .groupby("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth", values=valid_months)
    .agg(F_sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)
display(df_total_other_referral_sa_pivot)

In [0]:
#1.Outputs of Total requests for other types sa formatting date headers along the pivot
from datetime import datetime

for column in df_total_other_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_total_other_referral_sa_pivot = df_total_other_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_total_other_referral_sa_pivot = df_total_other_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['4) Other Types of Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_total_other_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=14,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_total_other_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_total_other_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 14 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D14"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
#processed for other types of spec advice
from pyspark.sql.functions import col, to_date, lit, sum as F_sum

#  Step 0: Ensure date column is properly typed
df_raw_sa = df_raw_sa.withColumn("EROC_DerMonth", to_date("EROC_DerMonth"))

#  Step 1: All Acute orgs
df_all_orgs = (
    df_raw_sa.filter(col("Acute_Status") == "Acute")
    .select("RESP_Org_Code", "RESP_Org_Name")
    .distinct()
)

#  Step 2: Filter for type 99
df_processed_other_referral_sa = (
    df_raw_sa.filter(
        (col("EROC_DerMonth") > lit("2022-03-01")) &
        (col("Acute_Status") == "Acute") &
        (col("EROC_DerStatus") != 2) &
        (col("EROC_DerOutcome") != 2) &
        (col("EROC_DerTFC") != 40) &
        (col("EROC_DerTypeOfSpecialistAdvice") == 99)
    )
    .select("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name", "EROC_Requests")
)

#  Step 3: Aggregate
df_processed_other_referral_sa = (
    df_processed_other_referral_sa
    .groupby("EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name")
    .agg(F_sum("EROC_Requests").alias("Requests"))
)

#  Step 4: Get all valid months in data
all_months_df = (
    df_processed_other_referral_sa
    .select("EROC_DerMonth")
    .distinct()
    .filter(col("EROC_DerMonth").isNotNull())
)

#  DEBUG: Show all months
#print(" Available Months for Type 99:")
valid_months = all_months_df.orderBy("EROC_DerMonth").rdd.flatMap(lambda x: x).collect()
#print(valid_months)

#  Step 5: Build org × month grid
full_grid = df_all_orgs.crossJoin(all_months_df)

#  Step 6: Join actual data to full grid
df_processed_other_referral_sa_full = (
    full_grid.join(
        df_processed_other_referral_sa,
        on=["RESP_Org_Code", "RESP_Org_Name", "EROC_DerMonth"],
        how="left"
    )
    .fillna(0, subset=["Requests"])
)

#  Step 7: Pivot table with all valid months
df_processed_other_referral_sa_pivot = (
    df_processed_other_referral_sa_full
    .groupby("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth", values=valid_months)
    .agg(F_sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)
display(df_processed_other_referral_sa_pivot)


In [0]:
#1.Outputs of processed requests for other types sa formatting date headers along the pivot
from datetime import datetime

for column in df_processed_other_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_processed_other_referral_sa_pivot = df_processed_other_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_processed_other_referral_sa_pivot = df_processed_other_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['4) Other Types of Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_processed_other_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=156,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_processed_other_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 148):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_processed_other_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 155 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D156"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format
        
# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
from pyspark.sql import functions as F

# 1. Base acute trust filter
df_acute_trusts = df_raw_sa.where(F.col("Acute_Status") == "Acute") \
    .select("RESP_Org_Code", "RESP_Org_Name").distinct()

# 2. Get unique months from the data (after filter)
df_months = df_raw_sa.select("EROC_DerMonth").distinct() \
    .where(F.col("EROC_DerMonth") > '2022-03-01')

# 3. Full grid: every trust × every month
df_full_grid = df_acute_trusts.crossJoin(df_months)

# 4. Apply full filtering logic from original query
df_diverted_other_referral_sa = df_raw_sa.where(
    (F.col("EROC_DerMonth") > '2022-03-01') & 
    (F.col("Acute_Status") == "Acute") & 
    (F.col("EROC_DerStatus") != 2) & 
    (F.col("EROC_DerOutcome").isin(10, 12)) & 
    (F.col("EROC_DerTypeOfSpecialistAdvice") == 99)
)

# 5. Group to calculate total requests by month & trust
df_diverted_other_referral_sa = df_diverted_other_referral_sa.groupBy(
    "EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name"
).agg(F.sum("EROC_Requests").alias("Requests"))

# 6. Left join the grouped data to the full grid (ensures full set of trusts/months)
df_diverted_other_referral_sa = df_full_grid.join(
    df_diverted_other_referral_sa,
    on=["EROC_DerMonth", "RESP_Org_Code", "RESP_Org_Name"],
    how="left"
).fillna(0, subset=["Requests"])

# 7. Pivot for final display/output (months become columns)
df_diverted_other_referral_sa_pivot = (
    df_diverted_other_referral_sa
    .groupBy("RESP_Org_Code", "RESP_Org_Name")
    .pivot("EROC_DerMonth")
    .agg(F.sum("Requests"))
    .orderBy("RESP_Org_Code", "RESP_Org_Name")
    .withColumnRenamed("RESP_Org_Code", "Provider Code")
    .withColumnRenamed("RESP_Org_Name", "Provider Name")
)

# 8. Display or write to output
display(df_diverted_other_referral_sa_pivot)

In [0]:
#1.Outputs of diverted requests for other types sa formatting date headers along the pivot
from datetime import datetime

for column in df_diverted_other_referral_sa_pivot.columns[2:]:
    month_format = datetime.strptime(column, '%Y-%m-%d')
    month_format = month_format.strftime("%b-%Y")
    df_diverted_other_referral_sa_pivot = df_diverted_other_referral_sa_pivot.withColumnRenamed(column, month_format)
 

#2.converting the pivot to pandas dataframe
df_pd_diverted_other_referral_sa_pivot = df_diverted_other_referral_sa_pivot.toPandas()

#3.creating a workbook
ws_total_all = wb['4) Other Types of Spec Advice']

safe_insert_pandas_df_into_excel(
    df=df_pd_diverted_other_referral_sa_pivot,
    ws=ws_total_all,
    header=True,
    startrow=299,
    startcol=2,
    index=False,
)

#4.Check width of table and get column numbers of unformatted columns 
number_of_months = df_diverted_other_referral_sa.select("EROC_DerMonth").distinct().count()
new_months = number_of_months - 37
pre_date_columns = 3
copy_column = 37 + pre_date_columns
end_column = number_of_months + pre_date_columns + 1

#5.copy and paste formatting onto unformatted columns
for column_number in range(copy_column, end_column):
    for row_number in range(14, 299):
        cell_to_copy_from = ws_total_all.cell(row=row_number, column=copy_column)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#6.get height of table and get row numbers of unformatted rows
number_of_providers = df_diverted_other_referral_sa.select("RESP_Org_Name").distinct().count()
new_provider = number_of_providers - 134
pre_table_rows = 298 
copy_row = pre_table_rows + 134
end_row = copy_row + new_provider + 1

#7.copy and paste formatting onto unformatted rows
for row_number in range(copy_row, end_row):
    for column_number in range(2, end_column):
        cell_to_copy_from = ws_total_all.cell(row=copy_row, column=column_number)
        cell_to_paste_to = ws_total_all.cell(row=row_number, column=column_number)
        excel.copy_all_cell_styles(cell_to_copy_from, cell_to_paste_to)
        
#8.Use the values calculated above to get the cell range of the whole table
conditional_formatting_start_cell = "D299"
conditional_formatting_end_col = openpyxl.utils.cell.get_column_letter(end_column - 1)
conditional_formatting_end_row = end_row - 1
conditional_formatting_end_cell = conditional_formatting_end_col + str(conditional_formatting_end_row)
conditional_formatting_range = f"{conditional_formatting_start_cell}:{conditional_formatting_end_cell}"

#9.Copy the existing conditional formatting rule, but make it cover the whole table using the range created above.
for rule in ws_total_all.conditional_formatting:
    ws_total_all.conditional_formatting.add(conditional_formatting_range, rule)

#10.updating publishing date header
ws_total_all.cell(row=3, column=3).value = date_header

from openpyxl.styles import NamedStyle

#11.Define the number format style
number_style = NamedStyle(name="number", number_format="#,##0")

#12.Apply the number format to the specified range
for row in ws_total_all.iter_rows(min_row=15, max_row=end_row, min_col=pre_date_columns + 1, max_col=end_column):
    for cell in row:
        cell.number_format = number_style.number_format

# Apply the number format to specific cells
for row in [149, 291, 434]:
    ws_total_all.cell(row=row, column=4).number_format = number_style.number_format

#wb.save('Outputs/Provider_template.xlsx')

In [0]:
from openpyxl.styles import NamedStyle

# Create custom style that shows dash for 0
dash_number_format = NamedStyle(name="dash_format", number_format='#,##0;-#,##0;"-"')

# Avoid error if style already exists
if "dash_format" not in wb.named_styles:
    wb.add_named_style(dash_number_format)

# Apply this formatting across all relevant sheets and ranges
target_sheet = wb["4) Other Types of Spec Advice"]

for row in target_sheet.iter_rows(min_row=14, max_row=target_sheet.max_row, min_col=3, max_col=target_sheet.max_column):
    for cell in row:
        if cell.value is None or cell.value == 0:
            cell.number_format = dash_number_format.number_format

In [0]:
wb.save('Outputs/Provider_template.xlsx')