#### Imports

In [1]:
import pandas as pd
import numpy as np

#### In ADP Data

In [2]:
raw_adp_df = pd.read_excel('inputs/PunchSource2023.xlsx') #read in the excel, put into DF
raw_adp_df = raw_adp_df[['Last Name', 'First Name', 'Time In', 'Pay Code [Timecard]']]
raw_adp_df = raw_adp_df[raw_adp_df['Pay Code [Timecard]'] != "REGSAL"] #drop managers from the pay data
raw_adp_df.loc[:, 'Date'] = raw_adp_df['Time In'].dt.date #make it solely a date col
raw_adp_df.drop(columns=['Time In', 'Pay Code [Timecard]'], inplace=True)

In [None]:
raw_adp_df

#### In Scheduler Data

In [3]:
raw_scheduler_df = pd.read_excel('inputs/SchedulerShifts2023.xlsx') #read in the excel, put into DF
raw_scheduler_df = raw_scheduler_df.drop(columns=["epoch_start_time", "cover_time"]) 

In [4]:
#can drop any rows that have cover_type full and role = 2 
# b/c cover_type full either means request in future or was not taken
# 2 = manager so not relevant 
raw_scheduler_df = raw_scheduler_df[~((raw_scheduler_df['role'] == 2.0) | 
                                      (raw_scheduler_df['cover_type'] == 'full'))]

raw_scheduler_df['start_time_est'] = raw_scheduler_df['start_time_est'].dt.date #convert to just date 
raw_scheduler_df['cover_time_est'] = raw_scheduler_df['cover_time_est'].dt.date #convert to just date 

In [None]:
raw_scheduler_df

#### Extract Coverer Data, Add to Bottom of DF, Split Names into First+Last

In [5]:
#extract the coverers to a new DF
cover_sch_df = raw_scheduler_df[raw_scheduler_df['cover_type'].isin(['before', 'after'])][['coverer_name', 'role', 'cover_time_est']]
cover_sch_df.rename(columns={'coverer_name': 'assignee_name', 'cover_time_est': 'start_time_est'}, inplace=True)

In [15]:
#combine the original DF with the coverer DF to make one final shift DF
all_sch_df = pd.concat([raw_scheduler_df, cover_sch_df], ignore_index=True)
all_sch_df.drop(columns=['role', 'coverer_name', 'cover_type', 'cover_time_est'], inplace=True) #drop unneeded columns

#split name into first and last
split_names = all_sch_df['assignee_name'].str.split()
all_sch_df['First Name'] = split_names.str[0]
all_sch_df['Last Name'] = split_names.str[1:].str.join(' ')
all_sch_df

Unnamed: 0,assignee_name,start_time_est,First Name,Last Name
0,Andrew DeKuyper,2023-06-25,Andrew,DeKuyper
1,Osias Williams,2023-05-29,Osias,Williams
2,Jacob Silber,2023-05-27,Jacob,Silber
3,Kate Niccolai,2023-06-11,Kate,Niccolai
4,Zachary Liebman,2023-08-30,Zachary,Liebman
...,...,...,...,...
4691,Elle Cadigan,2023-06-30,Elle,Cadigan
4692,John Hickey,2023-07-08,John,Hickey
4693,Grace Pappolla,2023-07-22,Grace,Pappolla
4694,Michael Mattiassi,2023-08-11,Michael,Mattiassi


In [11]:
all_sch_df[:50]

Unnamed: 0,assignee_name,start_time_est,First Name,Last Name
0,Andrew DeKuyper,2023-06-25,Andrew,DeKuyper
1,Osias Williams,2023-05-29,Osias,Williams
2,Jacob Silber,2023-05-27,Jacob,Silber
3,Kate Niccolai,2023-06-11,Kate,Niccolai
4,Zachary Liebman,2023-08-30,Zachary,Liebman
5,Joseph Pierro,2023-07-04,Joseph,Pierro
6,Jacob Silber,2023-05-28,Jacob,Silber
7,Carter Pierson,2023-07-02,Carter,Pierson
8,John Lawler,2023-05-29,John,Lawler
9,Natalie Tango,2023-06-15,Natalie,Tango


In [7]:
raw_adp_df

Unnamed: 0,Last Name,First Name,Date
0,Abam,Aria,2023-05-28
1,Abam,Aria,2023-05-29
2,Abam,Aria,2023-06-04
3,Abam,Aria,2023-06-20
4,Abam,Aria,2023-06-28
...,...,...,...
8024,von Brachel,Elinor,2023-07-12
8025,von Brachel,Elinor,2023-07-15
8026,von Brachel,Elinor,2023-07-18
8027,von Brachel,Elinor,2023-07-20


In [8]:
grouped_df = raw_adp_df.groupby(['Last Name', 'First Name']).size().reset_index(name='Count')
grouped_df

Unnamed: 0,Last Name,First Name,Count
0,Abam,Aria,14
1,Acken,Katie,28
2,Advani,Andrew,10
3,Alexander,Lukas,21
4,Alexopoulos,Drew,26
...,...,...,...
490,Zebrauskas,Mia,16
491,Ziaee,Armin,6
492,Zinnie,Taylor,17
493,Zotos,Alexandra,20
