## PAIRING TABLE

#### Imports

In [1]:
import pandas as pd
import numpy as np

#### Read In ADP Data For Pairing

In [2]:
#read in the excel, put into DF
raw_adp_df = pd.read_excel('inputs/PunchSourceEmails2023.xlsx') #read in the excel, put into DF

In [3]:
#filter the relevant data
raw_adp_df = raw_adp_df[['Last Name', 'First Name', 'Personal Contact: Personal Email', 
                         'Time In', 'Pay Code [Timecard]']] #keep these cols
raw_adp_df = raw_adp_df.rename(columns={'Personal Contact: Personal Email': 'Email'}) #rename col
##raw_adp_df = raw_adp_df[raw_adp_df['Pay Code [Timecard]'] != "REGSAL"] #drop managers from the pay data
raw_adp_df.loc[:, 'Date'] = raw_adp_df['Time In'].dt.date #make it solely a date col
raw_adp_df.drop(columns=['Time In', 'Pay Code [Timecard]'], inplace=True) #then drop orig time in col

raw_adp_df

Unnamed: 0,Last Name,First Name,Email,Date
0,Abam,Aria,ariaebam2006@gmail.com,2023-05-28
1,Abam,Aria,ariaebam2006@gmail.com,2023-05-29
2,Abam,Aria,ariaebam2006@gmail.com,2023-06-04
3,Abam,Aria,ariaebam2006@gmail.com,2023-06-20
4,Abam,Aria,ariaebam2006@gmail.com,2023-06-28
...,...,...,...,...
8038,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-12
8039,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-15
8040,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-18
8041,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-20


#### Read In Scheduler Shift Data For Pairing

In [4]:
#read in the excel, put into DF
raw_scheduler_df = pd.read_excel('inputs/SchedulerShifts2023.xlsx') #read in the excel, put into DF

In [5]:
#filter the relevant data
raw_scheduler_df = raw_scheduler_df.drop(columns=["epoch_start_time", "cover_time"]) #drop these cols

#can drop any rows that have cover_type full and role = 2 
# b/c cover_type full either means request in future or was not taken
# 2 = manager so not relevant 
##raw_scheduler_df = raw_scheduler_df[~((raw_scheduler_df['role'] == 2.0) | 
##                                      (raw_scheduler_df['cover_type'] == 'full'))]

raw_scheduler_df['start_time_est'] = raw_scheduler_df['start_time_est'].dt.date #convert to just date 
raw_scheduler_df['cover_time_est'] = raw_scheduler_df['cover_time_est'].dt.date #convert to just date 

raw_scheduler_df

Unnamed: 0,assignee_name,role,start_time_est,assignee_email,coverer_name,coverer_email,cover_type,cover_time_est
0,Andrew DeKuyper,3.0,2023-06-25,andrewhdekuyper@gmail.com,,,none,
1,Osias Williams,3.0,2023-05-29,osiasxwilli@icloud.com,,,none,
2,Jacob Silber,3.0,2023-05-27,njhype1@gmail.com,,,none,
3,Kate Niccolai,3.0,2023-06-11,katieniccolai@gmail.com,,,none,
4,Zachary Liebman,3.0,2023-08-30,zachsliebman@gmail.com,,,none,
...,...,...,...,...,...,...,...,...
4915,Clayton Thomas,3.0,2023-09-02,clay.thom8@gmail.com,,,none,
4916,Clayton Thomas,3.0,2023-09-01,clay.thom8@gmail.com,,,none,
4917,Clayton Thomas,3.0,2023-08-30,clay.thom8@gmail.com,,,none,
4918,Clayton Thomas,3.0,2023-09-03,clay.thom8@gmail.com,,,none,


In [6]:
#extract the coverers to a new DF
cover_sch_df = raw_scheduler_df[raw_scheduler_df['cover_type'].isin(['before', 'after'])][['coverer_name', 
                                                                                           'coverer_email', 
                                                                                           'role', 
                                                                                           'cover_time_est']]
cover_sch_df.rename(columns={'coverer_name': 'assignee_name', 
                             'coverer_email': 'assignee_email', 
                             'cover_time_est': 'start_time_est'}, inplace=True)

In [7]:
#combine the original DF with the coverer DF to make one final shift DF
all_sch_df = pd.concat([raw_scheduler_df, cover_sch_df], ignore_index=True)
all_sch_df.drop(columns=['role', 'coverer_name', 'cover_type', 'cover_time_est', 'coverer_email'], inplace=True) #drop unneeded columns

In [8]:
#split name into first and last
split_names = all_sch_df['assignee_name'].str.split()
all_sch_df['First Name'] = split_names.str[0]
all_sch_df['Last Name'] = split_names.str[1:].str.join(' ')

all_sch_df

Unnamed: 0,assignee_name,start_time_est,assignee_email,First Name,Last Name
0,Andrew DeKuyper,2023-06-25,andrewhdekuyper@gmail.com,Andrew,DeKuyper
1,Osias Williams,2023-05-29,osiasxwilli@icloud.com,Osias,Williams
2,Jacob Silber,2023-05-27,njhype1@gmail.com,Jacob,Silber
3,Kate Niccolai,2023-06-11,katieniccolai@gmail.com,Kate,Niccolai
4,Zachary Liebman,2023-08-30,zachsliebman@gmail.com,Zachary,Liebman
...,...,...,...,...,...
4975,Elle Cadigan,2023-06-30,ellemcadigan@gmail.com,Elle,Cadigan
4976,John Hickey,2023-07-08,jhickey0512@gmail.com,John,Hickey
4977,Grace Pappolla,2023-07-22,gracepappolla0222@gmail.com,Grace,Pappolla
4978,Michael Mattiassi,2023-08-11,mmattiassi51@gmail.com,Michael,Mattiassi


#### Define Timeframe Set to All Data

In [9]:
start_date = pd.Timestamp('2023-04-01').date()
end_date = pd.Timestamp('2023-08-01').date()

sch_timeframe_df = all_sch_df[(all_sch_df['start_time_est'] >= start_date) & 
                              (all_sch_df['start_time_est'] <= end_date)]

adp_timeframe_df = raw_adp_df[(raw_adp_df['Date'] >= start_date) & 
                              (raw_adp_df['Date'] <= end_date)]

In [10]:
sch_timeframe_df

Unnamed: 0,assignee_name,start_time_est,assignee_email,First Name,Last Name
0,Andrew DeKuyper,2023-06-25,andrewhdekuyper@gmail.com,Andrew,DeKuyper
1,Osias Williams,2023-05-29,osiasxwilli@icloud.com,Osias,Williams
2,Jacob Silber,2023-05-27,njhype1@gmail.com,Jacob,Silber
3,Kate Niccolai,2023-06-11,katieniccolai@gmail.com,Kate,Niccolai
5,Joseph Pierro,2023-07-04,jvp4805@gmail.com,Joseph,Pierro
...,...,...,...,...,...
4969,Billie Sherratt,2023-07-20,billiesherratt@gmail.com,Billie,Sherratt
4972,Catherine Ritter,2023-06-17,catherine.ritter@icloud.com,Catherine,Ritter
4975,Elle Cadigan,2023-06-30,ellemcadigan@gmail.com,Elle,Cadigan
4976,John Hickey,2023-07-08,jhickey0512@gmail.com,John,Hickey


In [11]:
adp_timeframe_df

Unnamed: 0,Last Name,First Name,Email,Date
0,Abam,Aria,ariaebam2006@gmail.com,2023-05-28
1,Abam,Aria,ariaebam2006@gmail.com,2023-05-29
2,Abam,Aria,ariaebam2006@gmail.com,2023-06-04
3,Abam,Aria,ariaebam2006@gmail.com,2023-06-20
4,Abam,Aria,ariaebam2006@gmail.com,2023-06-28
...,...,...,...,...
8038,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-12
8039,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-15
8040,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-18
8041,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-20


#### Grouping - Count Shifts

In [12]:
adp_grouped_df = adp_timeframe_df.groupby(['Email', 'First Name', 'Last Name']).size().reset_index(name='ADP_Count')
adp_grouped_df = adp_grouped_df.sort_values(by='Last Name') #sort by last name
adp_grouped_df

Unnamed: 0,Email,First Name,Last Name,ADP_Count
55,ariaebam2006@gmail.com,Aria,Abam,12
288,katieacken915@gmail.com,Katie,Acken,26
49,andrewadvani1815@gmail.com,Andrew,Advani,10
13,Meow0821@gmail.com,Lukas,Alexander,21
139,dbalex8@gmail.com,Drew,Alexopoulos,25
...,...,...,...,...
377,mrzebrauskas@icloud.com,Mia,Zebrauskas,16
284,kamran.ziaee@googlemail.com,Armin,Ziaee,6
473,taylorzinnie@gmail.com,Taylor,Zinnie,16
72,azotos101@yahoo.com,Alexandra,Zotos,19


In [13]:
sch_grouped_df = sch_timeframe_df.groupby(['assignee_email', 'First Name', 'Last Name']).size().reset_index(name='Sch_Count') #find count per person
sch_grouped_df = sch_grouped_df.sort_values(by='Last Name') #sort by last name
sch_grouped_df

Unnamed: 0,assignee_email,First Name,Last Name,Sch_Count
117,giovanna.vilela2021@gmail.com,Giovanna,,34
92,elenalynott@gmail.com,Elena,,15
307,whitneylapper24@gsbschool.org,Whitney,,16
62,conormreilly9@icloud.com,Conor,,2
243,renerincon28@gmail.com,rene,,17
...,...,...,...,...
192,liampreilly824@gmail.com,Liam,reilly,3
257,samrinn2004@gmail.com,Samuel,rinn,29
160,jrod050506@gmail.com,Jayden,rodriguez,21
283,stevensconnor40@gmail.com,connor,stevens,4


#### Create Pairing Table

In [14]:
def normalize_name(name):
    return name.strip().lower()

def normalize_email(email):
    return email.strip().lower()

#Function to match ADP to Scheduler using a variety of techniques
def create_merged_df8(adp_grouped_df, sch_grouped_df):
    merged_records = []
    unmatched_sch_records = []

    for sch_index, sch_row in sch_grouped_df.iterrows():
        normalized_assignee_email = normalize_email(sch_row['assignee_email'])

        matching_email_rows = adp_grouped_df[adp_grouped_df['Email'].apply(normalize_email) == normalized_assignee_email]
        
        if not matching_email_rows.empty:
            for _, matching_row in matching_email_rows.iterrows():
                merged_records.append([
                    matching_row['Email'], matching_row['First Name'], matching_row['Last Name'], matching_row['ADP_Count'],
                    sch_row['assignee_email'], sch_row['First Name'], sch_row['Last Name'], sch_row['Sch_Count'], 'email'
                ])
        else:
            # Check for exact first name and last name match
            exact_name_match_rows = adp_grouped_df[
                (adp_grouped_df['First Name'].apply(normalize_name) == normalize_name(sch_row['First Name'])) &
                (adp_grouped_df['Last Name'].apply(normalize_name) == normalize_name(sch_row['Last Name']))
            ]
            
            if not exact_name_match_rows.empty:
                for _, matching_row in exact_name_match_rows.iterrows():
                    merged_records.append([
                        matching_row['Email'], matching_row['First Name'], matching_row['Last Name'], matching_row['ADP_Count'],
                        sch_row['assignee_email'], sch_row['First Name'], sch_row['Last Name'], sch_row['Sch_Count'], 'name_exact'
                    ])
            else:
                # Check for first initial and last name match or first name and last initial match
                if sch_row['Last Name']:  # Check if 'Last Name' is not empty
                    initial_match_rows = adp_grouped_df[
                        ((adp_grouped_df['First Name'].str[0] == sch_row['First Name'][0]) &
                         (adp_grouped_df['Last Name'] == sch_row['Last Name'])) |
                        ((adp_grouped_df['First Name'] == sch_row['First Name']) &
                         (adp_grouped_df['Last Name'].str[0] == sch_row['Last Name'][0]))
                    ]
                
                    if not initial_match_rows.empty:
                        for _, matching_row in initial_match_rows.iterrows():
                            merged_records.append([
                                matching_row['Email'], matching_row['First Name'], matching_row['Last Name'], matching_row['ADP_Count'],
                                sch_row['assignee_email'], sch_row['First Name'], sch_row['Last Name'], sch_row['Sch_Count'], 'name_portion'
                            ])
                    else:
                        unmatched_sch_records.append(sch_row)
                else:
                    unmatched_sch_records.append(sch_row)  # Handle empty 'Last Name'

    # Create a DataFrame from the collected records
    merged_df = pd.DataFrame(merged_records, columns=[
        'Email_ADP', 'First_Name_ADP', 'Last_Name_ADP', 'ADP_Count',
        'Email_Sch', 'First_Name_Sch', 'Last_Name_Sch', 'Sch_Count', 'Matched?'
    ])
    
    # Create a DataFrame for unmatched rows from sch_grouped_df
    unmatched_sch_df = pd.DataFrame(unmatched_sch_records, columns=sch_grouped_df.columns)
    
    # Append unmatched rows to the merged_df with 'none' in the 'Matched?' column
    unmatched_rows = pd.DataFrame({
        'Email_ADP': None,
        'First_Name_ADP': None,
        'Last_Name_ADP': None,
        'ADP_Count': None,
        'Email_Sch': unmatched_sch_df['assignee_email'],
        'First_Name_Sch': unmatched_sch_df['First Name'],
        'Last_Name_Sch': unmatched_sch_df['Last Name'],
        'Sch_Count': unmatched_sch_df['Sch_Count'],
        'Matched?': 'none'
    })
    merged_df = pd.concat([merged_df, unmatched_rows], ignore_index=True)
    
    return merged_df

In [15]:
# Create the pairing table
pairing_table = create_merged_df8(adp_grouped_df, sch_grouped_df)

In [16]:
# Add Manual changes
email_to_change = "dalessionicholas6@gmail.com"
sch_email_to_match = "dalessnp@dukes.jmu.edu"

pairing_table.loc[pairing_table['Email_Sch'] == sch_email_to_match, 'Email_ADP'] = email_to_change
pairing_table.loc[pairing_table['Email_Sch'] == sch_email_to_match, 'Matched?'] = 'manual'

pairing_table = pairing_table[["Email_ADP", "Email_Sch", "Matched?"]] #keep relevant cols
pairing_table

Unnamed: 0,Email_ADP,Email_Sch,Matched?
0,giovanna.vilela2021@gmail.com,giovanna.vilela2021@gmail.com,email
1,whitneylapper24@gsbschool.org,whitneylapper24@gsbschool.org,email
2,conormreilly9@icloud.com,conormreilly9@icloud.com,email
3,jvalli@ithaca.edu,jvalli@ithaca.edu,email
4,rramos21.rr@gmail.com,rramos21.rr@gmail.com,email
...,...,...,...
308,,noodlespangler10@gmail.com,none
309,,ss9084134622@gmail.com,none
310,,jakeftoth@gmail.com,none
311,,christinatourtellot21@gmail.com,none


## ADOPTION CALC

#### Read in ADP Data For Adoption Calculation

In [17]:
#read in the excel, put into DF
adp_adoption = pd.read_excel('inputs/PunchSourceEmails2023.xlsx') #read in the excel, put into DF

In [18]:
#filter the relevant data
adp_adoption = adp_adoption[['Last Name', 'First Name', 'Personal Contact: Personal Email', 
                         'Time In', 'Pay Code [Timecard]']] #keep these cols
adp_adoption = adp_adoption.rename(columns={'Personal Contact: Personal Email': 'Email'}) #rename col
adp_adoption = adp_adoption[adp_adoption['Pay Code [Timecard]'] != "REGSAL"] #drop managers from the pay data
adp_adoption.loc[:, 'Date'] = adp_adoption['Time In'].dt.date #make it solely a date col
adp_adoption.drop(columns=['Time In', 'Pay Code [Timecard]'], inplace=True) #then drop orig time in col

adp_adoption

Unnamed: 0,Last Name,First Name,Email,Date
0,Abam,Aria,ariaebam2006@gmail.com,2023-05-28
1,Abam,Aria,ariaebam2006@gmail.com,2023-05-29
2,Abam,Aria,ariaebam2006@gmail.com,2023-06-04
3,Abam,Aria,ariaebam2006@gmail.com,2023-06-20
4,Abam,Aria,ariaebam2006@gmail.com,2023-06-28
...,...,...,...,...
8038,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-12
8039,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-15
8040,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-18
8041,von Brachel,Elinor,evonbrachel@gmail.com,2023-07-20


#### Read in Scheduler Data For Adoption Calculation

In [19]:
#read in the excel, put into DF
scheduler_adoption = pd.read_excel('inputs/SchedulerShifts2023.xlsx') #read in the excel, put into DF
scheduler_adoption

Unnamed: 0,assignee_name,role,epoch_start_time,start_time_est,assignee_email,coverer_name,coverer_email,cover_type,cover_time,cover_time_est
0,Andrew DeKuyper,3.0,1.687703e+09,2023-06-25 10:30:00,andrewhdekuyper@gmail.com,,,none,,NaT
1,Osias Williams,3.0,1.685387e+09,2023-05-29 15:00:00,osiasxwilli@icloud.com,,,none,,NaT
2,Jacob Silber,3.0,1.685196e+09,2023-05-27 10:00:00,njhype1@gmail.com,,,none,,NaT
3,Kate Niccolai,3.0,1.686488e+09,2023-06-11 09:00:00,katieniccolai@gmail.com,,,none,,NaT
4,Zachary Liebman,3.0,1.693411e+09,2023-08-30 12:00:00,zachsliebman@gmail.com,,,none,,NaT
...,...,...,...,...,...,...,...,...,...,...
4915,Clayton Thomas,3.0,1.693663e+09,2023-09-02 10:00:00,clay.thom8@gmail.com,,,none,,NaT
4916,Clayton Thomas,3.0,1.693591e+09,2023-09-01 14:00:00,clay.thom8@gmail.com,,,none,,NaT
4917,Clayton Thomas,3.0,1.693418e+09,2023-08-30 14:00:00,clay.thom8@gmail.com,,,none,,NaT
4918,Clayton Thomas,3.0,1.693750e+09,2023-09-03 10:00:00,clay.thom8@gmail.com,,,none,,NaT


In [20]:
#filter the relevant data
scheduler_adoption = scheduler_adoption.drop(columns=["epoch_start_time", "cover_time"]) #drop these cols

#can drop any rows that have cover_type full and role = 2 
# b/c cover_type full either means request in future or was not taken
# 2 = manager so not relevant 
scheduler_adoption = scheduler_adoption[~((scheduler_adoption['role'] == 2.0) | 
                                          (scheduler_adoption['cover_type'] == 'full'))]

scheduler_adoption['start_time_est'] = scheduler_adoption['start_time_est'].dt.date #convert to just date 
scheduler_adoption['cover_time_est'] = scheduler_adoption['cover_time_est'].dt.date #convert to just date 

scheduler_adoption

Unnamed: 0,assignee_name,role,start_time_est,assignee_email,coverer_name,coverer_email,cover_type,cover_time_est
0,Andrew DeKuyper,3.0,2023-06-25,andrewhdekuyper@gmail.com,,,none,
1,Osias Williams,3.0,2023-05-29,osiasxwilli@icloud.com,,,none,
2,Jacob Silber,3.0,2023-05-27,njhype1@gmail.com,,,none,
3,Kate Niccolai,3.0,2023-06-11,katieniccolai@gmail.com,,,none,
4,Zachary Liebman,3.0,2023-08-30,zachsliebman@gmail.com,,,none,
...,...,...,...,...,...,...,...,...
4915,Clayton Thomas,3.0,2023-09-02,clay.thom8@gmail.com,,,none,
4916,Clayton Thomas,3.0,2023-09-01,clay.thom8@gmail.com,,,none,
4917,Clayton Thomas,3.0,2023-08-30,clay.thom8@gmail.com,,,none,
4918,Clayton Thomas,3.0,2023-09-03,clay.thom8@gmail.com,,,none,


In [21]:
#extract the coverers to a new DF
cover_scheduler_adoption = scheduler_adoption[scheduler_adoption['cover_type'].isin(['before', 'after'])][['coverer_name', 
                                                                                           'coverer_email', 
                                                                                           'role', 
                                                                                           'cover_time_est']]
cover_scheduler_adoption.rename(columns={'coverer_name': 'assignee_name', 
                             'coverer_email': 'assignee_email', 
                             'cover_time_est': 'start_time_est'}, inplace=True)

In [22]:
#combine the original DF with the coverer DF to make one final shift DF
final_scheduler_adoption = pd.concat([scheduler_adoption, cover_scheduler_adoption], ignore_index=True)
final_scheduler_adoption.drop(columns=['role', 'coverer_name', 'cover_type', 'cover_time_est', 'coverer_email'], inplace=True) #drop unneeded columns

In [23]:
#split name into first and last
split_names = final_scheduler_adoption['assignee_name'].str.split()
final_scheduler_adoption['First Name'] = split_names.str[0]
final_scheduler_adoption['Last Name'] = split_names.str[1:].str.join(' ')

final_scheduler_adoption

Unnamed: 0,assignee_name,start_time_est,assignee_email,First Name,Last Name
0,Andrew DeKuyper,2023-06-25,andrewhdekuyper@gmail.com,Andrew,DeKuyper
1,Osias Williams,2023-05-29,osiasxwilli@icloud.com,Osias,Williams
2,Jacob Silber,2023-05-27,njhype1@gmail.com,Jacob,Silber
3,Kate Niccolai,2023-06-11,katieniccolai@gmail.com,Kate,Niccolai
4,Zachary Liebman,2023-08-30,zachsliebman@gmail.com,Zachary,Liebman
...,...,...,...,...,...
4691,Elle Cadigan,2023-06-30,ellemcadigan@gmail.com,Elle,Cadigan
4692,John Hickey,2023-07-08,jhickey0512@gmail.com,John,Hickey
4693,Grace Pappolla,2023-07-22,gracepappolla0222@gmail.com,Grace,Pappolla
4694,Michael Mattiassi,2023-08-11,mmattiassi51@gmail.com,Michael,Mattiassi


#### Define Timeframe For Adoption

In [24]:
start_date = pd.Timestamp('2023-06-01').date()
end_date = pd.Timestamp('2023-06-07').date()

TF_SCH = final_scheduler_adoption[(final_scheduler_adoption['start_time_est'] >= start_date) & 
                                  (final_scheduler_adoption['start_time_est'] <= end_date)]

TF_ADP = adp_adoption[(adp_adoption['Date'] >= start_date) & 
                      (adp_adoption['Date'] <= end_date)]

#### Grouping -- Count Shifts For Adoption Calc

In [25]:
GR_ADP = TF_ADP.groupby(['Email']).size().reset_index(name='ADP_Count')
GR_ADP.rename(columns={"Email": "Email_ADP"}, inplace=True)

In [26]:
GR_SCH = TF_SCH.groupby(['assignee_email']).size().reset_index(name='Sch_Count') #find count per person
GR_SCH.rename(columns={"assignee_email": "Email_Sch"}, inplace=True)

In [27]:
counting = GR_SCH.merge(pairing_table, on="Email_Sch", how="left")

In [28]:
counting = counting.merge(GR_ADP, on="Email_ADP", how="left")

counting

Unnamed: 0,Email_Sch,Sch_Count,Email_ADP,Matched?,ADP_Count
0,aidanmsheridan@gmail.com,1,aidanmsheridan@gmail.com,email,1.0
1,aileenwilson11208@gmail.com,1,aileenwilson11208@gmail.com,email,1.0
2,ajazzy206@gmail.com,2,ajazzy206@gmail.com,email,2.0
3,allyrose316@gmail.com,3,allyrose316@gmail.com,email,2.0
4,ameliamcgr@gmail.com,1,ameliamcgr@gmail.com,email,
...,...,...,...,...,...
109,warmfuzzymitts@gmail.com,1,warmfuzzymitts@gmail.com,email,
110,weedal24@students.spprep.org,4,weedal24@students.spprep.org,email,3.0
111,whitneylapper24@gsbschool.org,2,whitneylapper24@gsbschool.org,email,2.0
112,willsimmsjr12@gmail.com,2,willsimmsjr12@gmail.com,email,2.0


In [29]:
#Create Column to Measure difference in shifts
#Sch - ADP;
# PLUS means more SCH Shifts (relatively good)
# MINUS means more ADP shifts (relatively bad)

counting["Diff"] = np.where(
    counting["Sch_Count"].notna() & counting["ADP_Count"].notna(),
    counting["Sch_Count"] - counting["ADP_Count"],
    np.nan
)

counting

Unnamed: 0,Email_Sch,Sch_Count,Email_ADP,Matched?,ADP_Count,Diff
0,aidanmsheridan@gmail.com,1,aidanmsheridan@gmail.com,email,1.0,0.0
1,aileenwilson11208@gmail.com,1,aileenwilson11208@gmail.com,email,1.0,0.0
2,ajazzy206@gmail.com,2,ajazzy206@gmail.com,email,2.0,0.0
3,allyrose316@gmail.com,3,allyrose316@gmail.com,email,2.0,1.0
4,ameliamcgr@gmail.com,1,ameliamcgr@gmail.com,email,,
...,...,...,...,...,...,...
109,warmfuzzymitts@gmail.com,1,warmfuzzymitts@gmail.com,email,,
110,weedal24@students.spprep.org,4,weedal24@students.spprep.org,email,3.0,1.0
111,whitneylapper24@gsbschool.org,2,whitneylapper24@gsbschool.org,email,2.0,0.0
112,willsimmsjr12@gmail.com,2,willsimmsjr12@gmail.com,email,2.0,0.0


In [30]:
count_within_range = counting["Diff"].between(-2, 2).sum()
count_within_range 

82

#### Calculate Adoption Rates Per Week

In [31]:
def adoption_rates_per_week(start_date, end_date, ADP_DF, SCH_DF, pairing_table):
    
    #Filter the main ADP and SCH dataframes to the respective timeframes
    ADP_DF = ADP_DF[(ADP_DF['Date'] >= start_date) & 
                    (ADP_DF['Date'] <= end_date)]
    
    SCH_DF = SCH_DF[(SCH_DF['start_time_est'] >= start_date) & 
                    (SCH_DF['start_time_est'] <= end_date)]

    #Group the ADP and SCH DFs by email
    GR_ADP = ADP_DF.groupby(['Email']).size().reset_index(name='ADP_Count')
    GR_ADP.rename(columns={"Email": "Email_ADP"}, inplace=True)
    
    GR_SCH = SCH_DF.groupby(['assignee_email']).size().reset_index(name='SCH_Count')
    GR_SCH.rename(columns={"assignee_email": "Email_Sch"}, inplace=True)
    
    #Match the emails with the pairing table
    matched_df = GR_SCH.merge(pairing_table, on="Email_Sch", how="left")
    matched_df = matched_df.merge(GR_ADP, on="Email_ADP", how="left")
    
    #Create Column to Measure difference in shifts (plus means more SCH shiftS)
    matched_df["Diff"] = np.where(
        matched_df["SCH_Count"].notna() & matched_df["ADP_Count"].notna(),
        matched_df["SCH_Count"] - matched_df["ADP_Count"],
        np.nan
    )
    
    #Calculate the yes/partial/no counts
    adoption_yes_count = matched_df["Diff"].between(-2, 2).sum()
    adoption_partial_count = matched_df.shape[0] - adoption_yes_count
    adoption_no_count = GR_ADP.shape[0] - adoption_yes_count - adoption_partial_count
    sum_count = adoption_yes_count + adoption_partial_count + adoption_no_count
    
    #Calculate the yes/partial/no percents
    yes_perc = round(adoption_yes_count / sum_count, 4)
    partial_perc = round(adoption_partial_count / sum_count, 4)
    no_perc = round(adoption_no_count / sum_count, 4)
    
    #Format start/end date
    formatted_start_date = start_date.strftime("%Y-%m-%d")
    formatted_end_date = end_date.strftime("%Y-%m-%d")
    
    return [formatted_start_date, formatted_end_date, yes_perc, partial_perc, no_perc, sum_count]

In [32]:
results = []
date_ranges = [
    (pd.Timestamp('2023-05-30').date(), pd.Timestamp('2023-06-05').date()),
    (pd.Timestamp('2023-06-06').date(), pd.Timestamp('2023-06-12').date()),
    (pd.Timestamp('2023-06-13').date(), pd.Timestamp('2023-06-19').date()),
    (pd.Timestamp('2023-06-20').date(), pd.Timestamp('2023-06-26').date()),
    (pd.Timestamp('2023-06-27').date(), pd.Timestamp('2023-07-03').date()),
    (pd.Timestamp('2023-07-04').date(), pd.Timestamp('2023-07-10').date()),
    (pd.Timestamp('2023-07-11').date(), pd.Timestamp('2023-07-17').date()),
    (pd.Timestamp('2023-07-18').date(), pd.Timestamp('2023-07-24').date()),
    (pd.Timestamp('2023-07-25').date(), pd.Timestamp('2023-08-01').date()),
]


for start_date, end_date in date_ranges:
    week_result = adoption_rates_per_week(start_date, 
                                          end_date, 
                                          adp_adoption, 
                                          final_scheduler_adoption, 
                                          pairing_table)
    results.append(week_result)

result_df = pd.DataFrame(results, columns=["Start Date", "End Date", "Yes %", "Partial %", "No %", "Sum Users"])

In [33]:
result_df

Unnamed: 0,Start Date,End Date,Yes %,Partial %,No %,Sum Users
0,2023-05-30,2023-06-05,0.3762,0.1619,0.4619,210
1,2023-06-06,2023-06-12,0.4027,0.1131,0.4842,221
2,2023-06-13,2023-06-19,0.3923,0.1538,0.4538,260
3,2023-06-20,2023-06-26,0.4368,0.1119,0.4513,277
4,2023-06-27,2023-07-03,0.4516,0.0742,0.4742,310
5,2023-07-04,2023-07-10,0.396,0.0549,0.5491,346
6,2023-07-11,2023-07-17,0.3754,0.0587,0.566,341
7,2023-07-18,2023-07-24,0.3989,0.0598,0.5413,351
8,2023-07-25,2023-08-01,0.4012,0.064,0.5349,344


In [34]:
import plotly.express as px
import pandas as pd
