In [1]:
#import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import warnings
warnings.simplefilter("ignore")

In [2]:
filename = "TeamTasks" #input("filename: ") #filename of CDM raw data TeamTasks_20230402113512

In [3]:
#read data
team_tasks_pending_df = pd.read_excel("../CDM Reports/" + filename + ".xlsx", 
                                      usecols=["Task ID",
                                               "Case No",
                                               "Process Type",
                                               "Status",
                                               "Last Assigned To Team",
                                               "Task Pending With"])
dashboard_df = pd.read_excel("Data.xlsx", sheet_name="Base")

***

In [4]:
#remove rows with no case number or with no process type
team_tasks_pending_df = team_tasks_pending_df.dropna(axis=0, subset=["Case No", "Process Type"])

In [5]:
#select rows with only SG process types --> process type starts with "SG - ..."
#team_tasks_pending_df = team_tasks_pending_df[team_tasks_pending_df["Process Type"].str.contains("SG - ")]

In [6]:
#strip all trailing whitespace:
team_tasks_pending_df = team_tasks_pending_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [7]:
#make list of process types, teams
process_list = dashboard_df["Process Type"].tolist()
process_list = process_list + ["PLEASE SELECT THE CORRECT PROCESS TYPE", "PLEASE SELECT THE CORRECT PROCESS TYPE", "PLEASE SELECT THE CORRECT PROCESS TYPE"]
team_list = dashboard_df["Team"].tolist()
team_list = team_list + ["SG CAM DOC REVIEW", "SG DATA MANAGEMENT - MAILING", "SG DATA MANAGEMENT - STATIC"]

In [8]:
#split data into pending internally and pending externally(with others)
#internally: task pending with field is blank or "NONE"
#externally: everything else
pending_int_df = team_tasks_pending_df[(team_tasks_pending_df["Task Pending With"].isna()) |
                                       (team_tasks_pending_df["Task Pending With"]=="NONE")]
pending_ext_df = team_tasks_pending_df.drop(pending_int_df.index, errors="ignore")

***

In [9]:
#tasks that are pending within CAM teams
#make a list of # of tasks per process-team
#[task# for process1teamA, task# for process1teamB, task# for process2teamA, etc.]
pending_int_list = []
for idx,val in enumerate(process_list):
    count_pending_int_df = pending_int_df[(pending_int_df["Process Type"]==process_list[idx]) &
                                          (pending_int_df["Last Assigned To Team"]==team_list[idx])]
    pending_int_list.append(len(pd.unique(count_pending_int_df["Task ID"])))
    #print(process_list[idx], team_list[idx], len(pd.unique(count_pending_int_df["Task ID"])))

***

In [10]:
#tasks that are pending with teams outside CAM
#need a list of each external team (each list is 1 column): list for Front, list for IT, list for Compliance etc.
ext_team_list = list(pd.unique(pending_ext_df["Task Pending With"]))
pending_ext_dict = {}
for team in ext_team_list:
    
    pending_ext_list = []
    for idx,val in enumerate(process_list):
        count_pending_ext_df = pending_ext_df[(pending_ext_df["Process Type"]==process_list[idx]) &
                                              (pending_ext_df["Last Assigned To Team"]==team_list[idx])&
                                              (pending_ext_df["Task Pending With"]==team)]
        pending_ext_list.append(len(pd.unique(count_pending_ext_df["Task ID"])))
    pending_ext_dict[team] = pending_ext_list
    #print(process_list[idx], team_list[idx], len(pd.unique(count_pending_ext_df["Task ID"])))

***

In [11]:
#so now we have out lists:
#internally pending tasks: pending_int_list
#externally pending tasks: many pending_ext_lists within pending_ext_dict
#all we have to do is transform this into a dataframe:
data = zip(process_list,team_list,pending_int_list)
df1 = pd.DataFrame(data, columns=["Process Type",
                                  "Team",
                                  "Tasks Pending Within CAM"])

#add pending_ext_dict columns
df2 = pd.DataFrame(pending_ext_dict)
output_df = pd.concat([df1, df2], axis=1)

#add total row
output_df.loc['Total']= output_df.sum()
#set the last row of "Process" and "Team" columns to be blank
output_df.loc[output_df.index[-1], ["Process Type", "Team"]] = ''

In [12]:
output_df.to_csv("../Output/PendingTasksBreakdown.csv")
print("Completed.")

Completed.
