In [None]:
#First Stage of automating exception reports

In [None]:
#Downloading the raw data from Amazon Web Service (AWS) S3 
#This is the cloud warehouse where we store data for use on the analytical platform

#This is a package that allows us to download data from AWS S3 in a more usable way
#You may need to install it in the Terminal --> pip install arrow-pd-parser

In [None]:
from arrow_pd_parser import reader, writer
import boto3
import pandas as pd
import numpy as np
import io
import xlsxwriter

In [None]:
# Specifying the reader Both reader statements are equivalent and call the same readers under the hood
#Location of data - setting up the folder and file names
bucket = "s3://alpha-piatool/"
input_folder = "sopdq_AV/inputs/"
output_folder = "sopdq_AV/outputs/"
#raw_data = "WFPT Staff Data Mar22.csv"
#raw_data = "WFPT Staff Data Apr22.csv"
#raw_data = "WFPT Staff Data May22.csv"
raw_data = "WFPT Staff Data Jun22.csv"



In [None]:
#Loading the raw data stored by David Yuen 
raw_data = reader.read(bucket+input_folder+raw_data, file_format="csv")

In [None]:
raw_data.head(2)

In [None]:
#Tracie Kilbey creates four sheets from the raw dataset, which she then uses for the exception reports
#Let's start with creating the first sheet, as it's the simplest - "WfPT all"
#This is just the raw data, but we'll make a few changes to column headings to make it easier to use

In [None]:
WfPT_all = raw_data

In [None]:
# WfPT_all.columns = WfPT_all.columns.str.replace(" ", "_")
# WfPT_all.columns = map(str.lower, WfPT_all.columns)

In [None]:
raw_data.columns

In [None]:
#Save this file in AWS S3 
writer.write(df=WfPT_all, output_path=bucket+output_folder+"WfPT all.csv", file_format="csv")

In [None]:
#Completed first file
WfPT_all
len(WfPT_all)

In [None]:
# Now create second file - Probation inc HQ
# Filter Column "NOMS-MOJ" to leave only "NPS" or "NOMS HQ"

In [None]:
Probation_inc_HQ = raw_data[raw_data["NOMS-MOJ"].isin(["NOMS HQ", "NPS"])]

In [None]:
#Save on AWS S3
writer.write(df=Probation_inc_HQ, output_path=bucket+output_folder+"Probation inc HQ.csv", file_format="csv")

In [None]:
#Completed second file
Probation_inc_HQ.head(2)
len(Probation_inc_HQ)

In [None]:
# Now create the third file - Probation some HQ
#For those working in Probation Delivery Units (PDUs) and Region Offices, 
#identify those who have NOMS-MOJ (column S) value of "NPS".
# Add in Other HQ staff – see establishment names below (use the Prog CC and Admin CC)
# Cost Centre = 10207830, 10207831, 10207812

In [None]:
raw_data.head(2)

In [None]:
a = raw_data[raw_data["NOMS-MOJ"] == "NPS"]
b = raw_data[raw_data["NOMS-MOJ"] == "NOMS HQ"]
c = b[b["Cost Centre"].isin([10207830, 10207831, 10207812])]
probation_some_hq = pd.concat([a,c])

In [None]:
probation_some_hq
len(probation_some_hq)

In [None]:
#Save on AWS S3
writer.write(df=probation_some_hq, output_path=bucket+output_folder+"Probation some HQ.csv", file_format="csv")

In [None]:
#Create the fourth sheet - APs
## Filter for NOMS-HQ field of "NOMS HQ" and then filter for all establishment with “APs" at end of name. 
## Filter for National Approved Premises and AP Professionalisation Project (as in the table below).
### Cost centres = 10207588, 10207586


In [None]:
a = raw_data[raw_data["NOMS-MOJ"] == "NOMS HQ"]
b = a[a["Establishment"].str.contains("APs")]
c = raw_data[raw_data["Cost Centre"].isin([10207588, 10207586])]
aps = pd.concat([b,c])

In [None]:
len(aps)

In [None]:
#Save on AWS S3
writer.write(df=aps, output_path=bucket+output_folder+"APs.csv", file_format="csv")

In [None]:
####################################################################################################################
# Next step - to combine csv files into one workbook on excel
####################################################################################################################

In [None]:
## Name of csv files
# WfPT_all
# Probation_inc_HQ
# probation_some_hq
# aps

In [None]:
with io.BytesIO() as output:
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        WfPT_all.to_excel(writer, sheet_name='WfPT All')
        Probation_inc_HQ.to_excel(writer, sheet_name='Probation inc HQ')
        probation_some_hq.to_excel(writer, sheet_name='Probation some HQ')
        aps.to_excel(writer, sheet_name='APs')
    data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket('alpha-piatool').put_object(Key='sopdq_AV/outputs/WFPT Staff Data Mar22 Probation.xlsx', Body=data)


In [None]:
####################################################################################################################
# Next step - Add look up files
####################################################################################################################

In [None]:
#Loading the look up file stored in reference folder
look_ups = pd.read_excel(bucket+input_folder+"lookups_may22.xlsx",
                         sheet_name="Lookups", skiprows=2)

cost_centre_consistency = pd.read_excel(bucket+input_folder+"Lookups for Probation DQ v1.4.xlsx", 
                                        sheet_name="Cost Centre types", skiprows=2)

In [None]:
look_ups.columns

In [None]:
cost_centre_lookup = look_ups[['Cost Centre', 'Unnamed: 1', 'Unnamed: 2', 'Establishment']]
location_name_lookup = look_ups[["Unnamed: 42", "Unnamed: 43"]]
internal_function_lookup = look_ups[['Internal Function']]
job_title_lookup = look_ups[['Job']]
job_band_lookup = look_ups[['Grade DY']]


In [None]:
################################################################################################################
# Main Data Quality checks 
## Cost Centres
## HR locations
## Internal functions
## Job (Title)
## *Job (Band) not required but added in any event
################################################################################################################

In [None]:
#Tidy up Cost Centre lookup column titles
new_header = cost_centre_lookup.iloc[0] #grab the first row for the header
cost_centre_lookup = cost_centre_lookup[1:] #take the data less the header row
cost_centre_lookup.columns = new_header #set the header row as the df header

In [None]:
cost_centre_lookup = cost_centre_lookup[['Cost Centre Number','Region','Pdu Name (as used in the target)/LDU name', "Cost centre name in SOP"]]
cost_centre_lookup.head(3)

In [None]:
#Tidy up HR Location lookup column titles
hr_location_lookup = location_name_lookup
new_header = hr_location_lookup.iloc[1] #grab the first row for the header
hr_location_lookup = hr_location_lookup[2:] #take the data less the header row
hr_location_lookup.columns = new_header #set the header row as the df header

In [None]:
hr_location_lookup.head(2)

In [None]:
#Tidy up Internal Function lookup column title
new_header = internal_function_lookup.iloc[0] #grab the first row for the header
internal_function_lookup = internal_function_lookup[1:] #take the data less the header row
internal_function_lookup.columns = new_header #set the header row as the df header

In [None]:
internal_function_lookup.head(3)

In [None]:
#Tidy up Job (title) lookup column title
new_header = job_title_lookup.iloc[0] #grab the first row for the header
job_title_lookup = job_title_lookup[1:] #take the data less the header row
job_title_lookup.columns = new_header #set the header row as the df header

In [None]:
job_title_lookup.head(3)

In [None]:
#Tidy up Job (band) lookup column title
#new_header = job_band_lookup.iloc[0] #grab the first row for the header
#job_band_lookup = job_band_lookup[1:] #take the data less the header row
#job_band_lookup.columns = new_header #set the header row as the df header

In [None]:
job_band_lookup.head(3)

In [None]:
cost_centre_consistency = cost_centre_consistency[["Cost Centre", "Cost Centre Type"]]
cost_centre_consistency["Cost Centre"]=cost_centre_consistency["Cost Centre"].astype("Int64")
cost_centre_consistency.head(3)

In [None]:
# Save all lookups in single excel file
with io.BytesIO() as output:
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        cost_centre_lookup.to_excel(writer, sheet_name='Cost Centre Lookup')
        internal_function_lookup.to_excel(writer, sheet_name='Internal Function Lookup')
        hr_location_lookup.to_excel(writer, sheet_name='HR Location Lookup')
        job_title_lookup.to_excel(writer, sheet_name='Job Title Lookup')
        cost_centre_consistency.to_excel(writer, sheet_name="Cost Centre type")
    data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket('alpha-piatool').put_object(Key='sopdq_AV/outputs/WFPT lookups.xlsx', Body=data)


In [None]:
################################################################################
# Workforce Probation Error Report
################################################################################

In [None]:
## Step 0: Create Sheet "WfPT Probation Staff in Post"
### Use Probation Some HQ 
### Use cost_centre_consistency

In [None]:
len(probation_some_hq)

In [None]:
probation_some_hq.columns

In [None]:
wfpt_probation_sip = pd.merge(probation_some_hq, cost_centre_consistency, on = "Cost Centre", how = "left")

In [None]:
cols = ['Cost Centre Type', 'Internal Function', 'Job']
wfpt_probation_sip["Joined CCType,IF, Job For Consistency Check"] = wfpt_probation_sip[cols].apply(lambda row: ', '.join(row.values.astype(str)), axis=1)

In [None]:
##################################################################################################################

In [None]:
## Step 1: Create "Cost Centre Error Flag" column

# Merge main to lookup on cost centre
# If matched, "Valid", 
# If NA, "Missing", 
# If not NA and not matched, "Invalid"

In [None]:
cost_centre_lookup2 = cost_centre_lookup[["Cost Centre Number"]]
cost_centre_lookup2 = cost_centre_lookup2.dropna(subset=["Cost Centre Number"])
cost_centre_lookup2 =cost_centre_lookup2.drop_duplicates(subset=["Cost Centre Number"])
cost_centre_lookup2["Cost Centre"] = pd.to_numeric(cost_centre_lookup2["Cost Centre Number"])

In [None]:
wfpt_probation_sip2 = pd.merge(wfpt_probation_sip, cost_centre_lookup2, on="Cost Centre", how="left")

In [None]:
wfpt_probation_sip2["Cost Centre Error Flag"] = np.where(wfpt_probation_sip2["Cost Centre Number"] == wfpt_probation_sip2["Cost Centre"], "Valid", 
                                                         np.where(wfpt_probation_sip2["Cost Centre"].isna(), "Missing", "Invalid"))

In [None]:
print(
    len(wfpt_probation_sip2[wfpt_probation_sip2["Cost Centre Error Flag"]=="Invalid"]), 
    len(wfpt_probation_sip2[wfpt_probation_sip2["Cost Centre Error Flag"]=="Valid"]), 
    len(wfpt_probation_sip2[wfpt_probation_sip2["Cost Centre Error Flag"]=="Missing"]), 
    len(wfpt_probation_sip2)
)

In [None]:
## Step 2: Create "HR Location Error Flag" column

# Merge main to lookup on cost centre
# If matched, "Valid", 
# If NA, "Missing", 
# If not NA and not matched, "Invalid"

In [None]:
hr_location_lookup2 = hr_location_lookup[["Location Name", "Valid"]]                       
hr_location_lookup2 = hr_location_lookup.dropna(subset=["Location Name"])
hr_location_lookup2 = hr_location_lookup2.drop_duplicates(subset=["Location Name"])
hr_location_lookup2.columns = ["HR Location", "HR Valid"]

In [None]:
wfpt_probation_sip3 = pd.merge(wfpt_probation_sip2, hr_location_lookup2, on="HR Location", how="left")

In [None]:
wfpt_probation_sip3["HR Location Error Flag"] = np.where(wfpt_probation_sip3["HR Location"].isnull(), "Missing", 
                                                         np.where(wfpt_probation_sip3["HR Valid"].isnull(), "Invalid", "Valid"))

In [None]:
print(
    len(wfpt_probation_sip3[wfpt_probation_sip3["HR Location Error Flag"]=="Invalid"]), 
    len(wfpt_probation_sip3[wfpt_probation_sip3["HR Location Error Flag"]=="Valid"]), 
    len(wfpt_probation_sip3[wfpt_probation_sip3["HR Location Error Flag"]=="Missing"]), 
    len(wfpt_probation_sip3)
)

In [None]:
## Step 3: Create "Internal Function Error Flag" column

# Merge main to lookup on cost centre
# If matched, "Valid", 
# If NA, "Missing", 
# If not NA and not matched, "Invalid"

In [None]:
internal_function_lookup2 = internal_function_lookup[["Unified internal Function"]]                       
internal_function_lookup2 = internal_function_lookup2.dropna(subset=["Unified internal Function"])
internal_function_lookup2 = internal_function_lookup2.drop_duplicates(subset=["Unified internal Function"])
internal_function_lookup2 = internal_function_lookup2[~internal_function_lookup2["Unified internal Function"].str.contains("NPS|Internal Function")]
internal_function_lookup2.columns = ["Internal Function"]
internal_function_lookup2["Internal Function Valid"] = "Valid"

In [None]:
wfpt_probation_sip4 = pd.merge(wfpt_probation_sip3, internal_function_lookup2, on="Internal Function", how="left")

In [None]:
wfpt_probation_sip4["Internal Function Error Flag"] = np.where(wfpt_probation_sip4["Internal Function"].isnull(), "Missing", 
                                                         np.where(wfpt_probation_sip4["Internal Function Valid"].isnull(), "Invalid", "Valid"))

In [None]:
print(
    r"Invalid total: ", len(wfpt_probation_sip4[wfpt_probation_sip4["Internal Function Error Flag"]=="Invalid"]), "\n",
    r"Valid total: ", len(wfpt_probation_sip4[wfpt_probation_sip4["Internal Function Error Flag"]=="Valid"]), "\n", 
    r"Missing Total: ", len(wfpt_probation_sip4[wfpt_probation_sip4["Internal Function Error Flag"]=="Missing"]), "\n",
    r"Total Staff:", len(wfpt_probation_sip4)
)

In [None]:
## Step 4: Create "Job Error Flag" column

# Merge main to lookup on cost centre
# If matched, "Valid", 
# If NA, "Missing", 
# If not NA and not matched, "Invalid"

In [None]:
job_title_lookup2 = job_title_lookup[["Probation Jobs"]]
job_title_lookup2

In [None]:
job_title_lookup2 = job_title_lookup[["Probation Jobs"]]
job_title_lookup2 = job_title_lookup[["Probation Jobs"]]                     
job_title_lookup2 = job_title_lookup2.dropna(subset=["Probation Jobs"])
job_title_lookup2 = job_title_lookup2.drop_duplicates(subset=["Probation Jobs"])
job_title_lookup2.columns = ["Job"]
job_title_lookup2["Job Function Valid"] = "Valid"

In [None]:
wfpt_probation_sip5 = pd.merge(wfpt_probation_sip4, job_title_lookup2, on="Job", how="left")

In [None]:
wfpt_probation_sip5.head(2)

In [None]:
wfpt_probation_sip5["Job Error Flag"] = np.where(wfpt_probation_sip5["Job"].isnull(), "Missing",
                                                 np.where(wfpt_probation_sip5["Job Function Valid"].isnull(), "Invalid", "Valid"))

In [None]:
print(
    r"Invalid total: ", len(wfpt_probation_sip5[wfpt_probation_sip5["Job Error Flag"]=="Invalid"]), "\n",
    r"Valid total: ", len(wfpt_probation_sip5[wfpt_probation_sip5["Job Error Flag"]=="Valid"]), "\n", 
    r"Missing Total: ", len(wfpt_probation_sip5[wfpt_probation_sip5["Job Error Flag"]=="Missing"]), "\n",
    r"Total Staff:", len(wfpt_probation_sip5)
)

In [None]:
wfpt_probation_sip5.head(1)

In [None]:
wfpt_probation_sip5.columns

In [None]:
wfpt_probation_sip_final = wfpt_probation_sip5.drop(["Cost Centre Number", "HR Valid", "Internal Function Valid", "Job Function Valid"], axis=1)

In [None]:
wfpt_probation_sip_final.head(2)

In [None]:
len(wfpt_probation_sip_final)

In [None]:
####################################################################################################################
# Final Error Report
####################################################################################################################

In [None]:
#################################################################
# Step 1: WfPT data
#################################################################

In [None]:
# WORKFORCE PLANNING TOOL DATA
# Employee Number
# Cost Centre
# Cost Centre Description (Establishment)
# HR Location
# Internal Function
# Grade
# Job (title)
# FTE
# Location Postcode
# Area_Directorate
# Hours

# ERROR FLAGS
# Cost Centre Error Flag
# HR Location Error Flag
# Internal Function Error Flag
# Job Error Flag

In [None]:
wfpt_data = wfpt_probation_sip_final[["Employee Number", "Cost Centre", "Establishment", "HR Location", "Internal Function", 
                                      "Grade", "Job", "FTE", "Location Post Code", "Area_Directorate", "Hours", 
                                     'Cost Centre Error Flag', 'HR Location Error Flag','Internal Function Error Flag', 'Job Error Flag']]

In [None]:
wfpt_probation_sip_final.columns

In [None]:
wfpt_data.head(1)

In [None]:
#################################################################
# Step 2: Tables % Valid
#################################################################

In [None]:
#################################################################
# Rename columns
#################################################################

In [None]:
wfpt_data_report = wfpt_data.copy(deep=True)

In [None]:
wfpt_data_report.rename(columns={"Establishment" : 'Cost Centre Description (Establishment)', "Job" : 'Job (title)'}, inplace=True)

In [None]:
wfpt_data_report.head(1)

In [None]:
#Columns: 
## Region (Area Directorate)
## Total Cases
### Number missing, Number invalid, Number valid
## Cost Centre 
### Number missing, Number invalid, Number valid
## HR Location
### Number missing, Number invalid, Number valid
## Internal Function
### Number missing, Number invalid, Number valid
## Job
### Number missing, Number invalid, Number valid
## Total Valid 
### Number missing, Number invalid, Number valid
## Percentage point difference from England and Wales
## Comparison with England and Wales
## Trend June 2021 to March 2022

In [None]:
#################################################################
#Table 1: Number of errors and percentage of valid cases by Region and variable 
# (excluding Approved Premises in England and some HQ functions), 
# 31 March 2022 with trends
################################################################## 

In [None]:
table1 = wfpt_data_report[["Area_Directorate", "Cost Centre Error Flag", "HR Location Error Flag", 
                          "Internal Function Error Flag", "Job Error Flag"]]

In [None]:
#group by area directorate
#sum total cases 

In [None]:
table1.head(3)

In [None]:
#group by area directorate
#Cost Centre: sum missing, invalid, valid, % of valid/total cases

In [None]:
cols = ['Cost Centre Error Flag', 'HR Location Error Flag', 'Internal Function Error Flag', 'Job Error Flag']
table1_valid_raw = []

for col in cols:
    df = pd.crosstab(table1['Area_Directorate'], table1[col])
    df.columns = pd.MultiIndex.from_product([[col], df.columns.tolist()])
    table1_valid_raw.append(df)

table1_valid = pd.concat(table1_valid_raw, axis=1)

In [None]:
table1_valid

In [None]:
complete_list = [('Cost Centre Error Flag', 'Invalid'),
                 ('Cost Centre Error Flag', 'Valid'),
                 ('Cost Centre Error Flag', 'Missing'),
                 ('HR Location Error Flag', 'Invalid'),
                 ('HR Location Error Flag', 'Missing'),
                 ('HR Location Error Flag', 'Valid'),
                 ('Internal Function Error Flag', 'Invalid'),
                 ('Internal Function Error Flag', 'Missing'),
                 ('Internal Function Error Flag', 'Valid'),
                 ('Job Error Flag', 'Invalid'),
                 ('Job Error Flag', 'Missing'),
                 ('Job Error Flag', 'Valid')]

In [None]:
missing_items_table1 = list(set(complete_list) - set(table1_valid.columns.to_list()))

In [None]:
for item in missing_items_table1:
    table1_valid[item] = 0
    table1_valid = table1_valid.sort_index(axis=1)

In [None]:
table1_valid.head(1)

In [None]:
table1_valid[("Total Cases", "")] = (table1_valid[("Cost Centre Error Flag", "Invalid")] + 
                                     table1_valid[("Cost Centre Error Flag", "Valid")] + 
                                     table1_valid[("Cost Centre Error Flag", "Missing")])

In [None]:
table1_valid.head(3)

In [None]:
table1_valid[("Cost Centre Error Flag", "% valid")] = table1_valid[("Cost Centre Error Flag", "Valid")] / table1_valid[("Total Cases", "")]

table1_valid[("HR Location Error Flag", "% valid")] = table1_valid[("HR Location Error Flag", "Valid")] /table1_valid[("Total Cases", "")]

table1_valid[("Internal Function Error Flag", "% valid")] = table1_valid[("Internal Function Error Flag", "Valid")] /table1_valid[("Total Cases", "")]

table1_valid[("Job Error Flag", "% valid")] =table1_valid[("Job Error Flag", "Valid")] /table1_valid[("Total Cases", "")]

In [None]:
table1_valid = table1_valid.sort_index(axis=1)

In [None]:
table1_valid

In [None]:
table1_valid[("Total Valid", "Missing")] = (table1_valid[("Cost Centre Error Flag", "Missing")] +
                                                  table1_valid[("HR Location Error Flag", "Missing")] +
                                                  table1_valid[("Internal Function Error Flag", "Missing")] +
                                                  table1_valid[("Job Error Flag", "Missing")])

table1_valid[("Total Valid", "Invalid")] = (table1_valid[("Cost Centre Error Flag", "Invalid")] +
                                                  table1_valid[("HR Location Error Flag", "Invalid")] +
                                                  table1_valid[("Internal Function Error Flag", "Invalid")] +
                                                  table1_valid[("Job Error Flag", "Invalid")])

table1_valid[("Total Valid", "Valid")] = (table1_valid[("Cost Centre Error Flag", "Valid")] +
                                                  table1_valid[("HR Location Error Flag", "Valid")] +
                                                  table1_valid[("Internal Function Error Flag", "Valid")] +
                                                  table1_valid[("Job Error Flag", "Valid")])

In [None]:
england_and_wales = (sum(table1_valid[("Total Valid", "Valid")]) / 
                     (sum(table1_valid[("Total Valid", "Missing")]) + 
                      sum(table1_valid[("Total Valid", "Invalid")]) + 
                      sum(table1_valid[("Total Valid", "Valid")])))
england_and_wales 

In [None]:
table1_valid[("Total Valid", "% valid")] = (table1_valid[("Total Valid", "Valid")] / 
                                                  (table1_valid[("Total Valid", "Valid")] + 
                                                   table1_valid[("Total Valid", "Invalid")] + 
                                                   table1_valid[("Total Valid", "Missing")]))

In [None]:
table1_valid[("Percentage point difference from England and Wales", "")] = (table1_valid[("Total Valid", "% valid")] - england_and_wales)

In [None]:
table1_final = table1_valid.copy(deep = True)

In [None]:
table1_final.columns

In [None]:
table1_final = table1_final[[
            (                                       'Total Cases',        ''),
    
            (                            'Cost Centre Error Flag', 'Missing'),
            (                            'Cost Centre Error Flag', 'Invalid'),
            (                            'Cost Centre Error Flag',   'Valid'),
            (                            'Cost Centre Error Flag', '% valid'),

            (                            'HR Location Error Flag', 'Missing'),
            (                            'HR Location Error Flag', 'Invalid'),
            (                            'HR Location Error Flag',   'Valid'),
            (                            'HR Location Error Flag', '% valid'),
 
            (                      'Internal Function Error Flag', 'Missing'),
            (                      'Internal Function Error Flag', 'Invalid'),
            (                      'Internal Function Error Flag',   'Valid'),
            (                      'Internal Function Error Flag', '% valid'),
    
            (                                    'Job Error Flag', 'Missing'),
            (                                    'Job Error Flag', 'Invalid'),
            (                                    'Job Error Flag',   'Valid'),
            (                                    'Job Error Flag', '% valid'),
    
            (                                       'Total Valid', 'Missing'),
            (                                       'Total Valid', 'Invalid'),
            (                                       'Total Valid',   'Valid'),
            (                                       'Total Valid', '% valid'),
    
            ('Percentage point difference from England and Wales',        '')]]

In [None]:
table1_final.head(2)

In [None]:
###########################################################################
# Table 2: Number of errors and percentage of valid cases by Region and variable 
# (excluding Approved Premises in England and some HQ functions), 
# 31 March 2022 with trends
############################################################################

In [None]:
wfpt_data_report.columns

In [None]:
table2 = wfpt_data_report[["Area_Directorate", "Cost Centre Description (Establishment)", "Cost Centre", 
                           "Cost Centre Error Flag", "HR Location Error Flag", "Internal Function Error Flag", "Job Error Flag"]]

In [None]:
table2.head()

In [None]:
cols = ['Cost Centre Error Flag', 'HR Location Error Flag', 'Internal Function Error Flag', 'Job Error Flag']
table2_valid_raw = []

for col in cols:
    df = pd.crosstab(table2["Cost Centre"], table2[col])
    df.columns = pd.MultiIndex.from_product([[col], df.columns.tolist()])
    table2_valid_raw.append(df)

table2_valid = pd.concat(table2_valid_raw, axis=1)

In [None]:
missing_items_table2 = list(set(complete_list) - set(table2_valid.columns.to_list()))

In [None]:
for item in missing_items_table2:
    table2_valid[item] = 0
    table2_valid = table2_valid.sort_index(axis=1)

In [None]:
table2_valid

In [None]:
table2_valid[("Total Cases", "")] = (table2_valid[("Cost Centre Error Flag", "Invalid")] + 
                                     table2_valid[("Cost Centre Error Flag", "Valid")] + 
                                     table2_valid[("Cost Centre Error Flag", "Missing")])

In [None]:
table2_valid[("Cost Centre Error Flag", "% valid")] = table2_valid[("Cost Centre Error Flag", "Valid")] / table2_valid[("Total Cases", "")]

table2_valid[("HR Location Error Flag", "% valid")] = table2_valid[("HR Location Error Flag", "Valid")] /table2_valid[("Total Cases", "")]

table2_valid[("Internal Function Error Flag", "% valid")] = table2_valid[("Internal Function Error Flag", "Valid")] /table2_valid[("Total Cases", "")]

table2_valid[("Job Error Flag", "% valid")] = table2_valid[("Job Error Flag", "Valid")] /table2_valid[("Total Cases", "")]

In [None]:
table2_valid = table2_valid.sort_index(axis=1)

In [None]:
table2_valid[("Total Valid", "Missing")] = (table2_valid[("Cost Centre Error Flag", "Missing")] +
                                                  table2_valid[("HR Location Error Flag", "Missing")] +
                                                  table2_valid[("Internal Function Error Flag", "Missing")] +
                                                  table2_valid[("Job Error Flag", "Missing")])

table2_valid[("Total Valid", "Invalid")] = (table2_valid[("Cost Centre Error Flag", "Invalid")] +
                                                  table2_valid[("HR Location Error Flag", "Invalid")] +
                                                  table2_valid[("Internal Function Error Flag", "Invalid")] +
                                                  table2_valid[("Job Error Flag", "Invalid")])

table2_valid[("Total Valid", "Valid")] = (table2_valid[("Cost Centre Error Flag", "Valid")] +
                                                  table2_valid[("HR Location Error Flag", "Valid")] +
                                                  table2_valid[("Internal Function Error Flag", "Valid")] +
                                                  table2_valid[("Job Error Flag", "Valid")])

In [None]:
england_and_wales = (sum(table2_valid[("Total Valid", "Valid")]) / 
                     (sum(table2_valid[("Total Valid", "Missing")]) + 
                      sum(table2_valid[("Total Valid", "Invalid")]) + 
                      sum(table2_valid[("Total Valid", "Valid")])))
england_and_wales 

In [None]:
table2_valid[("Total Valid", "% valid")] = (table2_valid[("Total Valid", "Valid")] / 
                                                  (table2_valid[("Total Valid", "Valid")] + 
                                                   table2_valid[("Total Valid", "Invalid")] + 
                                                   table2_valid[("Total Valid", "Missing")]))

In [None]:
table2_valid

In [None]:
table2_details = table2[["Area_Directorate", "Cost Centre", "Cost Centre Description (Establishment)"]]
table2_details = table2_details.drop_duplicates(ignore_index = True)
table2_details = table2_details.set_index("Cost Centre")
table2_details.columns = [table2_details.columns, ["", "Establishment"]]
table2_details

In [None]:
table2_final = table2_valid.join(table2_details)
table2_final.reset_index()

In [None]:
table2_final.head(2)

In [None]:
table2_final_version = table2_final.reset_index()

In [None]:
table2_final_version = table2_final_version.set_index("Area_Directorate")

In [None]:
table2_final_version = table2_final_version[[("Cost Centre", ""),
            ('Cost Centre Description (Establishment)', 'Establishment'),
            ('Total Cases',        ''),
            ('Cost Centre Error Flag', '% valid'),
            ('Cost Centre Error Flag', 'Invalid'),
            ('Cost Centre Error Flag', 'Missing'),
            ('Cost Centre Error Flag',   'Valid'),
            ('HR Location Error Flag', '% valid'),
            ('HR Location Error Flag', 'Invalid'),
            ('HR Location Error Flag', 'Missing'),
            ('HR Location Error Flag',   'Valid'),
            ('Internal Function Error Flag', '% valid'),
            ('Internal Function Error Flag', 'Invalid'),
            ('Internal Function Error Flag', 'Missing'),
            ('Internal Function Error Flag',   'Valid'),
            ('Job Error Flag', '% valid'),
            ('Job Error Flag', 'Invalid'),
            ('Job Error Flag', 'Missing'),
            ('Job Error Flag',   'Valid'),
            ('Total Valid', 'Missing'),
            ('Total Valid', 'Invalid'),
            ('Total Valid',   'Valid'),
            ('Total Valid', '% valid')]]

In [None]:
table2_final_version[("Difference of % valid from England and Wales", "")] = round(table2_final_version[("Total Valid", "% valid")] - england_and_wales,1)
table2_final_version[("Total Valid", "% valid")] = round(table2_final_version[("Total Valid", "% valid")], 1)

In [None]:
table2_final = table2_final_version.copy(deep = True).sort_index()

In [None]:
table2_final.head(2)

In [None]:
#################################################################
#Table 3: Number and percentage of complete cases by Region and variable 
#(excluding Approved Premises in England and some HQ functions), 31 March 2022
################################################################## 

In [None]:
table1_valid.head(1)

In [None]:
# Table 3 is just Table 1 with invalid and valid treated as complete. 
table3_complete = table1_valid.copy(deep = True)

In [None]:
table3_complete[("Cost Centre Error Flag", "Complete")] = (table3_complete[("Cost Centre Error Flag", "Valid")] +
                                                           table3_complete[("Cost Centre Error Flag", "Invalid")]) 
                                                                    

table3_complete[("HR Location Error Flag", "Complete")] = (table3_complete[("HR Location Error Flag", "Valid")] + 
                                                             table3_complete[("HR Location Error Flag", "Invalid")])

table3_complete[("Internal Function Error Flag", "Complete")] = (table3_complete[("Internal Function Error Flag", "Valid")] +
                                                                 table3_complete[("Internal Function Error Flag", "Invalid")])

table3_complete[("Job Error Flag", "Complete")] = (table3_complete[("Job Error Flag", "Valid")] +
                                                   table3_complete[("Job Error Flag", "Invalid")])

In [None]:
table3_complete.head(1)

In [None]:
table3_complete[("Cost Centre Error Flag", "% complete")] = (table3_complete[("Cost Centre Error Flag", "Complete")] /
                                                                       table3_complete[("Total Cases", "")])

table3_complete[("HR Location Error Flag", "% complete")] = (table3_complete[("HR Location Error Flag", "Complete")] /
                                                                       table3_complete[("Total Cases", "")])

table3_complete[("Internal Function Error Flag", "% complete")] = (table3_complete[("Internal Function Error Flag", "Complete")] /
                                                                             table3_complete[("Total Cases", "")])

table3_complete[("Job Error Flag", "% complete")] = (table3_complete[("Job Error Flag", "Complete")] /
                                                               table3_complete[("Total Cases", "")])


In [None]:
table3_complete

In [None]:
table3_complete[("Total Complete", "Missing")] = (table3_complete[("Cost Centre Error Flag", "Missing")] + 
                                                  table3_complete[("HR Location Error Flag", "Missing")] + 
                                                  table3_complete[("Internal Function Error Flag", "Missing")] + 
                                                  table3_complete[("Job Error Flag", "Missing")])

table3_complete[("Total Complete", "Complete")] = (table3_complete[("Cost Centre Error Flag", "Complete")] + 
                                                  table3_complete[("HR Location Error Flag", "Complete")] + 
                                                  table3_complete[("Internal Function Error Flag", "Complete")] + 
                                                  table3_complete[("Job Error Flag", "Complete")])

table3_complete[("Total Complete", "% complete")] = ( (table3_complete[("Total Complete", "Complete")]) /
                                                    (table3_complete[("Total Complete", "Missing")] + table3_complete[("Total Complete", "Complete")]))

In [None]:
table3_complete

In [None]:
table3_final = table3_complete[[('Total Cases', ""),
                                ('Cost Centre Error Flag', "Missing"),
                                ('Cost Centre Error Flag', "Complete"),
                                ('Cost Centre Error Flag', "% complete"),
                                
                                ('HR Location Error Flag', "Missing"),
                                ('HR Location Error Flag', "Complete"),
                                ('HR Location Error Flag', "% complete"),
                                
                                ('Internal Function Error Flag', "Missing"),
                                ('Internal Function Error Flag', "Complete"),
                                ('Internal Function Error Flag', "% complete"),
                                
                                ('Job Error Flag', "Missing"),
                                ('Job Error Flag', "Complete"),
                                ('Job Error Flag', "% complete"),
                                
                                ('Total Complete', "Missing"),
                                ('Total Complete', "Complete"),
                                ('Total Complete', "% complete")]]

In [None]:
table3_final

In [None]:
#################################################################
#Table 4: Number and percentage of complete cases by Region, variable and Cost Centre 
#(excluding Approved Premises in England and some HQ functions), 31 March 2022
################################################################## 

In [None]:
table2_final_version.head(1)

In [None]:
table4_complete = table2_final_version.copy(deep=True)

In [None]:
table4_complete.head(1)

In [None]:
table4_complete[("Cost Centre Error Flag", "Complete")] = (table4_complete[("Cost Centre Error Flag", "Valid")] +
                                                           table4_complete[("Cost Centre Error Flag", "Invalid")]) 
                                                                    
table4_complete[("HR Location Error Flag", "Complete")] = (table4_complete[("HR Location Error Flag", "Valid")] + 
                                                             table4_complete[("HR Location Error Flag", "Invalid")])

table4_complete[("Internal Function Error Flag", "Complete")] = (table4_complete[("Internal Function Error Flag", "Valid")] +
                                                                 table4_complete[("Internal Function Error Flag", "Invalid")])

table4_complete[("Job Error Flag", "Complete")] = (table4_complete[("Job Error Flag", "Valid")] +
                                                   table4_complete[("Job Error Flag", "Invalid")])

In [None]:
table4_complete[("Cost Centre Error Flag", "% complete")] = (table4_complete[("Cost Centre Error Flag", "Complete")] /
                                                                       table4_complete[("Total Cases", "")])

table4_complete[("HR Location Error Flag", "% complete")] = (table4_complete[("HR Location Error Flag", "Complete")] /
                                                                       table4_complete[("Total Cases", "")])

table4_complete[("Internal Function Error Flag", "% complete")] = (table4_complete[("Internal Function Error Flag", "Complete")] /
                                                                             table4_complete[("Total Cases", "")])

table4_complete[("Job Error Flag", "% complete")] = (table4_complete[("Job Error Flag", "Complete")] /
                                                               table4_complete[("Total Cases", "")])


In [None]:
table4_complete[("Total Complete", "Missing")] = (table4_complete[("Cost Centre Error Flag", "Missing")] + 
                                                  table4_complete[("HR Location Error Flag", "Missing")] + 
                                                  table4_complete[("Internal Function Error Flag", "Missing")] + 
                                                  table4_complete[("Job Error Flag", "Missing")])

table4_complete[("Total Complete", "Complete")] = (table4_complete[("Cost Centre Error Flag", "Complete")] + 
                                                  table4_complete[("HR Location Error Flag", "Complete")] + 
                                                  table4_complete[("Internal Function Error Flag", "Complete")] + 
                                                  table4_complete[("Job Error Flag", "Complete")])

table4_complete[("Total Complete", "% complete")] = ((table4_complete[("Total Complete", "Complete")] /
                                                    (table4_complete[("Total Complete", "Missing")] + 
                                                     table4_complete[("Total Complete", "Complete")])))

In [None]:
table4_complete.head(1)

In [None]:
table4_final = table4_complete[[("Cost Centre", ""), 
                                ("Cost Centre Description (Establishment)", "Establishment"),
                                ('Total Cases', ""),
                                ('Cost Centre Error Flag', "Missing"),
                                ('Cost Centre Error Flag', "Complete"),
                                ('Cost Centre Error Flag', "% complete"),
                                
                                ('HR Location Error Flag', "Missing"),
                                ('HR Location Error Flag', "Complete"),
                                ('HR Location Error Flag', "% complete"),
                                
                                ('Internal Function Error Flag', "Missing"),
                                ('Internal Function Error Flag', "Complete"),
                                ('Internal Function Error Flag', "% complete"),
                                
                                ('Job Error Flag', "Missing"),
                                ('Job Error Flag', "Complete"),
                                ('Job Error Flag', "% complete"),
                                
                                ('Total Complete', "Missing"),
                                ('Total Complete', "Complete"),
                                ('Total Complete', "% complete")]].sort_index()

In [None]:
table4_final.head(1)

In [None]:
# Save all tables in single excel file
with io.BytesIO() as output:
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        table1_final.to_excel(writer, sheet_name='Table 1')
        table2_final.to_excel(writer, sheet_name='Table 2')
        table3_final.to_excel(writer, sheet_name='Table 3')
        table4_final.to_excel(writer, sheet_name='Table 4')
        wfpt_data.to_excel(writer, sheet_name="WfPT data")
        #trend_data.to_excel(writer, sheet_name="Trend Data")    ---> To be completed
    data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket('alpha-piatool').put_object(Key='sopdq_AV/outputs/WfPT Error Report.xlsx', Body=data)


In [None]:
####################################################################################################
# Arrange columns to be identical to previous versions
#Sort large tables by area and then cost centre
# Add Approved Premises - check to make sure identical with prev versions
# Test on other months
####################################################################################################