In [1]:
#import libraries
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter("ignore")

In [2]:
filename = "TaskHistory" #input("filename: ") #filename of CDM Task History TaskHistory_20230402122833

In [3]:
#read data
task_history_df = pd.read_excel("../CDM Reports/" + filename + ".xlsx", 
                                header=0,
                                usecols=["Process Type",
                                         "Task ID",
                                         "Case No",
                                         "Status",
                                         "Last Assigned To Team",
                                         "Rejection Reason"])

dashboard_df = pd.read_excel("Data.xlsx",
                             sheet_name="Base")

***

In [4]:
#clean data
#select only rejected tasks
task_history_df = task_history_df[task_history_df["Status"]=="REJECTED"]

In [5]:
#remove rows with no case number and no process type
task_history_df = task_history_df.dropna(axis=0, subset=["Case No", "Process Type"])

In [6]:
#select rows only with SG process type
task_history_df = task_history_df[task_history_df["Process Type"].str.contains("SG - ")]

In [7]:
#remove trailing whitespace
task_history_df = task_history_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [8]:
#convert reasons of rejection into a list (users are allowed to pick more than one) - reason1, reason2 -> [reason1, reason2]
#explode the list into rows, then reset indexes
task_history_df["Rejection Reason"] = task_history_df["Rejection Reason"].apply(lambda x: list(x.split(",")))
task_history_df = task_history_df.explode("Rejection Reason")
task_history_df = task_history_df.reset_index(drop=True)

In [9]:
#lists:
team_list = dashboard_df["Team"].tolist()
process_list = dashboard_df["Process Type"].tolist()
rej_list = list(pd.unique(task_history_df["Rejection Reason"]))

***

In [10]:
#for each process/team, tally reasons of rejection: process1teamA {reason1: 1, reason2: 3, reason4: 1, etc,}
rejected_list = []
for idx in range(len(process_list)):
    rejected_cases_df = task_history_df[(task_history_df["Process Type"]==process_list[idx]) &
                                        (task_history_df["Last Assigned To Team"]==team_list[idx])]
    
    temp_dict = {}
    for reason in rej_list:
        temp_dict[reason] = len(rejected_cases_df[rejected_cases_df["Rejection Reason"]==reason])
    
    rejected_list.append(temp_dict)

***

In [11]:
#organize this list of dictionaries into a proper dataframe. each dictionary is a row. recursively append each row to main dataframe
rej_reasons_df = pd.DataFrame()
for idx,val in enumerate(rejected_list):
    temp_df = pd.DataFrame(rejected_list[idx], index=[process_list[idx] + ", " + team_list[idx]])
    rej_reasons_df = rej_reasons_df.append(temp_df) 

In [12]:
#single process-team index -> convert to regular column -> convert to list within col -> split list into 2 columns -> final output
rej_reasons_df["Process-Team"] = rej_reasons_df.index
rej_reasons_df["Process-Team"] = rej_reasons_df["Process-Team"].apply(lambda x: list(x.split(",")))
rej_reasons_df = rej_reasons_df.reset_index()
split_df = pd.DataFrame(rej_reasons_df["Process-Team"].tolist(), columns=["Process", "Team"])
output_df = (pd.concat((split_df, rej_reasons_df), axis=1)).drop(["index", "Process-Team"], axis=1)
#add total row
output_df.loc['Total']= output_df.sum()
#set the last row of "Process" and "Team" columns to be blank
output_df.loc[output_df.index[-1], ["Process", "Team"]] = ''

In [14]:
output_df.to_csv("../Output/RejectedTasksBreakdown.csv")
print("Completed.")

Completed.
