In [None]:
#import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import warnings
warnings.simplefilter("ignore")

In [None]:
filename = "TeamTasks" 
report_date = input("report date (DD MMM YYYY): ") #date that report was extracted 02 Apr 2023

In [None]:
#read data
team_tasks_pending_df = pd.read_excel("../CDM Reports/" + filename + ".xlsx", 
                                      usecols=["Task ID",
                                               "Task Type",
                                               "Case No",
                                               "Process Type",
                                               "Status",
                                               "Last Assigned To Team",
                                               "Last Assigned Date",
                                               "Task Pending With",
                                               "Generic Volume"])
dashboard_df = pd.read_excel("Data.xlsx", sheet_name="Base")
bc_list_df = pd.read_excel("Data.xlsx", sheet_name="Bulk Creation")
gv_list_df = pd.read_excel("Data.xlsx", sheet_name="Generic Volume")

***

In [None]:
#remove rows with no case number or with no process type and no task ID
team_tasks_pending_df = team_tasks_pending_df.dropna(axis=0, subset=["Task ID", "Case No", "Process Type"])

In [None]:
#select rows with only SG process types --> process type starts with "SG - ..."
team_tasks_pending_df = team_tasks_pending_df[team_tasks_pending_df["Process Type"].str.contains("SG - ")]

In [None]:
#strip all trailing whitespace:
team_tasks_pending_df = team_tasks_pending_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [None]:
#convert dates to datetime
team_tasks_pending_df["Last Assigned Date"] = pd.to_datetime(team_tasks_pending_df["Last Assigned Date"])

***

In [None]:
#preprocessing for new due date. public holiday will not be excluded to be more in line with the CDM logic
last_assigned_date = team_tasks_pending_df["Last Assigned Date"].tolist()
#match dashboard SLA to the team-task combinations in raw data using left JOIN
team_tasks_pending_df = pd.merge(team_tasks_pending_df, dashboard_df,
                                 how='left', 
                                 left_on=["Process Type","Last Assigned To Team"], 
                                 right_on = ["Process Type","Team"])
team_SLA = team_tasks_pending_df["Defined SLA (days)"]

In [None]:
#calculate new due date
def date_by_adding_business_days(from_date, add_days): #https://stackoverflow.com/questions/12691551
    business_days_to_add = add_days
    new_date = from_date
    while business_days_to_add > 0:
        new_date += timedelta(days=1)
        weekday = new_date.weekday()
        if weekday >= 5: continue # 5:saturday, 6:sunday
        business_days_to_add -= 1
    return new_date

new_due_date = []
for idx,val in enumerate(last_assigned_date):
    from_date = val
    add_days = team_SLA[idx]
    new_due_date.append(date_by_adding_business_days(from_date, add_days))

In [None]:
#create new column for new due dates and drop rows where process-team did not match
team_tasks_pending_df["New Due Date"] = new_due_date
team_tasks_pending_df = team_tasks_pending_df.dropna(axis=0, subset=["Team"])

***

In [None]:
#select only cases exceeding SLA: due date earlier(<) than report date
#if due date == report date, task is NOT overdue
report_datetime = datetime.strptime(report_date, "%d %b %Y")
team_tasks_pending_df = team_tasks_pending_df[team_tasks_pending_df["New Due Date"] < report_datetime]

In [None]:
#calculate number of days for which the task is overdue
temp = list(pd.to_datetime(team_tasks_pending_df["New Due Date"]))
new_due_date_list = [i.to_pydatetime() for i in temp]
team_tasks_pending_df["Days Overdue"] = [np.busday_count(i.date(), report_datetime.date()) for i in new_due_date_list]

***

In [None]:
#make a list of all distinct cases
distinct_cases = pd.unique(team_tasks_pending_df["Case No"]).tolist()
case_task_dict = {case: pd.unique(team_tasks_pending_df[team_tasks_pending_df["Case No"]==case]["Task ID"]).tolist() for case in distinct_cases}

In [None]:
#find all cases and corresponding teams
pending_with_1 = []
pending_with_1_df = pd.DataFrame()
pending_with_many = []
pending_with_many_df = pd.DataFrame()

In [None]:
#separate cases if they are in 1/more than 1 team's queue
for case in case_task_dict.keys():
    temp_df = team_tasks_pending_df[team_tasks_pending_df["Case No"]==case]
    set_of_teams = {team for team in temp_df['Last Assigned To Team'].unique()}    
    if len(set_of_teams) == 1: pending_with_1.append(case)
    else: pending_with_many.append([case, set_of_teams])

In [None]:
#store all tasks associated with each list in dataframe
#pending_with_1 used for further analysis, pending_with_many to give to line managers
pending_with_1_df = pd.DataFrame()
pending_with_many_df = pd.DataFrame()
for case in pending_with_1:
    temp_df = team_tasks_pending_df[team_tasks_pending_df["Case No"]==case]
    pending_with_1_df = pd.concat([pending_with_1_df, temp_df], ignore_index=True)
for case in pending_with_many_df:
    temp_df = team_tasks_pending_df[team_tasks_pending_df["Case No"]==case]
    pending_with_many_df = pd.concat([pending_with_many_df, temp_df], ignore_index=True)

***

In [None]:
#remove cases pending with other departments
#temp_df = pending_with_1_df.dropna(axis=0, subset=["Task Pending With"])
#task_pending_with = temp_df["Case No"].unique().tolist() #list of cases pending that are pending with other departments
#pending_with_1_df = pending_with_1_df[~pending_with_1_df["Case No"].isin(task_pending_with)] #remove rows containing cases that are in task_pending_with list

In [None]:
#get a list of cases (and respective teams) that are pending with other departments
#instead of case number only, we should do [case, team] instead
#that way we can remove cases based on associated teams


#if i separate the tasks into pending with 1/pending with many BEFORE i remove cases, this will remove this problem?
#because if the case is marked as Pending With by 1 team and not marked as Pending With by another team, 
#then it is in 2 team queues and that should not be the case already.

#therefore this is a non-issue. just need to re-order my code.

#in order of importance?:
#1. task pending with multiple ACM teams
#2. task pending with other departments
#unless it does not count as pending with multiple ACM teams if it is not Pending With other departments for only 1 team
#eg. does not count as pending with multiple ACM teams if STATIC has marked it as Pending With Front but CAM has not
#in this case it would count as pending with CAM only?

***

In [None]:
#make list of process types, teams
process_type_input = dashboard_df["Process Type"].tolist()
team_input = dashboard_df["Team"].tolist()

In [None]:
#exceptions to list
bc_list = bc_list_df.to_numpy().tolist() #bulk creation
gv_list = gv_list_df.to_numpy().tolist() #generic volume

***

In [None]:
#bulk creation: creating dataframe
bulk_creation_df = pd.DataFrame()
for idx in range(len(bc_list)):    
    temp_df = pending_with_1_df[(pending_with_1_df["Last Assigned To Team"]==bc_list[idx][0]) &
                                (pending_with_1_df["Process Type"]==bc_list[idx][1]) &
                                (pending_with_1_df["Task Type"]==bc_list[idx][2])]
    bulk_creation_df = pd.concat([bulk_creation_df,temp_df])

In [None]:
#bulk creation: counting volume
bc_pending_data = []
bc_pending_list = []
for idx in range(len(process_type_input)):
    #pending bulk creation tasks
    bc_pending_df = bulk_creation_df[(bulk_creation_df["Process Type"]==process_type_input[idx]) &
                                     (bulk_creation_df["Last Assigned To Team"]==team_input[idx]) &
                                     (bulk_creation_df["Task Pending With"].isna())] #select tasks which are not pending with other departments on a team by team basis
    #bc_pending_data.append(len(pd.unique(bc_pending_df["Task ID"])))
    
    temp_dict = {}
    for case in list(pd.unique(bc_pending_df["Case No"])): #for each case, get list of unique task types
        unique_tasktype_list = list(pd.unique(bc_pending_df[bc_pending_df["Case No"]==case]["Task Type"]))
        
        bc_volume_by_task = []
        for tasktype in unique_tasktype_list: #for each task type within case, calculate bulk creation volume: (len(unique(task IDs)))
            bc_volume_by_task.append(len(pd.unique(bc_pending_df[(bc_pending_df["Case No"]==case) & (bc_pending_df["Task Type"]==tasktype)]["Task ID"])))
        
        bc_volume_by_case = max(bc_volume_by_task) #bc volume for each case is max(volume of task type 1, volume of task type 2) )
        #temp_list.append([case, bc_volume_by_case]) #append to temp_list [(case1, volume), (case2, volume), (case3, volume), etc.]
        temp_dict[case] = bc_volume_by_case
        
    bc_pending_data.append(temp_dict) #append to bc_completed_data [temp_list for Process1TeamA, temp_list for Process2TeamA, etc.]
    
    bc_pending_list.append(list(pd.unique(bc_pending_df["Case No"])))

***

In [None]:
#generic volume: creating dataframe
generic_volume_df = pd.DataFrame()
for idx in range(len(gv_list)):
    temp_df = pending_with_1_df[(pending_with_1_df["Team"]==gv_list[idx][0]) &
                                (pending_with_1_df["Process Type"]==gv_list[idx][1]) &
                                (pending_with_1_df["Task Type"]==gv_list[idx][2])]
    generic_volume_df = pd.concat([generic_volume_df,temp_df])

In [None]:
#generic volume: preprocessing. fill all blank cells, convert each cell to list and take the last element of the list
#last entry of generic volume is the latest input in CDM task
generic_volume_df["Generic Volume"] = generic_volume_df["Generic Volume"].fillna('1')
generic_volume_df["Generic Volume"] = generic_volume_df["Generic Volume"].astype(str) #necessary step in the case where all generic volume entries are miraculously somehow all integers
generic_volume_df["Generic Volume"] = generic_volume_df["Generic Volume"].apply(lambda x: list(x.split(","))[-1])
generic_volume_df["Generic Volume"] = generic_volume_df["Generic Volume"].astype(int)

In [None]:
#generic volume: counting volume
gv_pending_data = []
gv_pending_list = []
for idx in range(len(process_type_input)):
    #pending generic volume tasks
    gv_pending_df = generic_volume_df[(generic_volume_df["Process Type"]==process_type_input[idx]) &
                                      (generic_volume_df["Last Assigned To Team"]==team_input[idx]) &
                                      (generic_volume_df["Task Pending With"].isna())] #select tasks which are not pending with other departments on a team by team basis 
    #if gv_pending_df["Generic Volume"].max() == 0: gv_count = 1
    #else: gv_count = gv_pending_df["Generic Volume"].max()
    #gv_pending_data.append(gv_count)
    
    temp_dict = {}
    for case in list(pd.unique(gv_pending_df["Case No"])): #for each case
        temp_df = gv_pending_df[gv_pending_df["Case No"]==case] #filtered by case ONLY
        #counting generic volume by CASE
        if temp_df["Generic Volume"].max() == 0: gv_pending_count = 1 
        else: gv_pending_count = temp_df["Generic Volume"].max()
        #temp_list.append([case, gv_completed_count]) #list of volume by case eg. [[case1,1], [case2,3], [case3,4]]
        temp_dict[case] = gv_pending_count
        
    gv_pending_data.append(temp_dict) #temp_list for each process/team eg. [temp_list of process1teamA, temp_list of process1teamB, etc.]
    
    gv_pending_list.append(list(pd.unique(gv_pending_df["Case No"])))

***

In [None]:
#case volume: creating remaining dataframe
case_volume_df = pending_with_1_df.drop(bulk_creation_df.index)

In [None]:
#case volume: counting volume
case_pending_data = []
case_pending_list = []
for idx in range(len(process_type_input)):
    case_pending_df = case_volume_df[(case_volume_df["Process Type"]==process_type_input[idx]) &
                                     (case_volume_df["Last Assigned To Team"]==team_input[idx]) &
                                     (case_volume_df["Task Pending With"].isna())] #select tasks which are not pending with other departments on a team by team basis
    
    pending_case_list = list(pd.unique(case_pending_df["Case No"]))
    #list compre makes a list of [[case1,1],[case2,1],etc.]. case vol is always 1.
    #case_completed_data.append([[case_list[i],1] for i in range(len(completed_case_list))]) 
    case_pending_data.append({pending_case_list[pending_case_list.index(i)]:1 for i in pending_case_list})
    
    #print(process_type_input[idx], team_input[idx], {pending_case_list[pending_case_list.index(i)]:1 for i in pending_case_list})
    
    #case_pending_data.append(len(pd.unique(case_pending_df["Case No"])))
    case_pending_list.append(list(pd.unique(case_pending_df["Case No"])))

***

In [None]:
#calculate median days overdue per process per Team
median_overdue_data = []
for idx in range(len(process_type_input)):
    #data for calculating median per processtype/team
    median_overdue_df = pending_with_1_df[(pending_with_1_df["Process Type"]==process_type_input[idx]) &
                                          (pending_with_1_df["Last Assigned To Team"]==team_input[idx]) &
                                          (pending_with_1_df["Task Pending With"].isna())] #select tasks which are not pending with other departments on a team by team basis
    #take max time taken (among all tasks per process per team) for each case
    #for case in pd.unique(median_overdue_df["Case No"]):
        #median_overdue_df["Days Overdue"] = median_overdue_df[median_overdue_df["Case No"]==case]["Days Overdue"].max()
    #median_overdue_df = median_overdue_df.drop_duplicates(subset="Case No")
    #median_overdue_data.append(median_overdue_df["Days Overdue"].median())
    
    df = median_overdue_df.groupby(by='Case No')[['Days Overdue']].max().reset_index()
    median_overdue_data.append(df['Days Overdue'].median())

***

In [None]:
pending_data = []
for i in range(len(bc_pending_data)): #to loop through the list. each list is the same length, bc_completed pick arbitrarily
    case_list = list(bc_pending_data[i].keys()) + list(gv_pending_data[i].keys()) + list(case_pending_data[i].keys())
    case_set = set(case_list)
    x = 0
    for j in case_set:
        templist = [bc_pending_data[i].get(j), gv_pending_data[i].get(j), case_pending_data[i].get(j)]
        temparray = np.array(templist, dtype=np.float64)
        x += np.nanmax(temparray)
    pending_data.append(x)

In [None]:
#combine pending volume count
pending_list = [", ".join(set(i+j+k)) for i,j,k in zip(bc_pending_list, gv_pending_list, case_pending_list)]

***

In [None]:
data = zip(process_type_input,team_input,pending_data,median_overdue_data,pending_list)
output_df = pd.DataFrame(data, columns=["Process Type",
                                        "Team",
                                        "Total Volume (backlog)",
                                        "Median Days Overdue",
                                        "Pending Cases"])
output_df = output_df.fillna('NA')

In [None]:
output_df.to_csv("../Output/PendingCases.csv", index=False)
#pending_with_many - want to output this in a way that lists the teams involved right?
print("Completed")