In [27]:
import pandas as pd
import numpy as np
from typing import List, Optional

In [28]:
mentor_df = pd.read_csv('../../data/Attendance_Template_v3 - Mentors.csv')
staff_df = pd.read_csv('../../data/Attendance_Template_v3 - Staff.csv')
meeting_1 = pd.read_csv('../../data/2025-07-29 17_45 wrt-nsga-feo Attendance Report - Attendees.csv')
meeting_2 = pd.read_csv('../../data/2025-07-30 17_50 kan-ptwt-ehz Attendance Report - Attendees.csv')

In [29]:
mentor_df

Unnamed: 0,First Name,Last Name,Alternate Name
0,Alex,D,
1,Annaleya,Hamilton,
2,Austin,Reading,
3,Barbara,Adkins,
4,Bill,Montgomery,
5,Blare,Robinson,
6,Chris,Metcalfe,Chris Metcalfe
7,Chris,Kaelin,
8,Christina,Porter,
9,Cody,Miller,


In [30]:
staff_df

Unnamed: 0,First Name,Last name,Alternate Name,Email,Region
0,Ailene,Johnston,,,
1,Alli,Rippy,,,
2,Amanda,Gearhart,,,
3,August,Mapp,,,
4,Blake,Herbert,,,
5,Brian,Luerman,,,
6,classroom,admin,,,
7,Dan,Collins,,,
8,Danny,Morton,,,
9,David,York,,,


In [31]:
def keep_name(df: pd.DataFrame) -> pd.DataFrame:
    """
    Titles columns in a DataFrame and drops all other columns that is not the 'First Name' and 'Last Name'.

    Args:
        df (pd.DataFrame): A data frame containing at least 'First Name' and 'Last Name' columns.

    Returns:
        pd.DataFrame: A Data Frame with only the 'First Name' and 'Last Name' Columns
    """
    df.columns = df.columns.str.title()
    col_to_keep = ['First Name', 'Last Name']
    return df[col_to_keep].copy()

In [32]:
def process_meeting_times(df: pd.DataFrame) -> pd.DataFrame:
    """
    Standardize meeting time columns and calculate duration.

    Steps performed:
    1. Converts 'Time Joined' and 'Time Exited' columns to datetime objects.
    2. Calculates the meeting duration in minutes and stores it in 'Duration in Minutes'.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing meeting data with 'Time Joined' and 'Time Exited' columns.

    Returns
    -------
    pd.DataFrame
        Updated DataFrame with time columns as datetime and a duration column.
    """
    df.columns = df.columns.str.title()
    df['Time Joined'] = pd.to_datetime(df['Time Joined'], format='%I:%M %p', errors='coerce')
    df['Time Exited'] = pd.to_datetime(df['Time Exited'], format='%I:%M %p', errors='coerce')
    df['Duration In Minutes'] = (
        df['Time Exited'] - df['Time Joined']
    ).dt.total_seconds() / 60
    return df


def add_full_name(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add a 'Full Name' column by combining 'First Name' and 'Last Name'.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing 'First Name' and 'Last Name' columns.

    Returns
    -------
    pd.DataFrame
        DataFrame with a new 'Full Name' column.
    """
    df['Full Name'] = df['First Name'] + ' ' + df['Last Name']
    return df

In [33]:
def split_names(df: pd.DataFrame) -> pd.DataFrame:
    """Function accounting for instance where first name column contains more than just the first name

    Args:
        df (pd.DataFrame): DataFrame that needs to be checked for more than one name in first name

    Returns:
        pd.DataFrame: DataFrame containing only the first name in the first name column and fills the last name column with the last name
    """
    if ' ' in df['First Name']:
        names = df['First Name'].split()
        if len(names) >= 2:
            df['First Name'] = names[0]
            df['Last Name'] = names[-1]
    return df

In [34]:
def clean_name_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    Clean and standardize names for a DataFrame.

    Steps performed:
    1. Capitalizes column names for consistency.
    2. Drops all comlumns that are not 'Fist Name' or 'Last Name'
    3. Fill null values in name columns.
    4. Accounts for instance where 'First Name' contains 'Last Name'
    5. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.

    Parameters
    ----------
    df : pd.DataFrame
        Input DataFrame containing name data.

    Returns
    -------
    pd.DataFrame
        A cleaned DataFrame with standardized columns and a 'Full Name' field.
    """
    df = keep_name(df)
    if df.isna().any().any() == True:
        df = df.fillna('Unknown')
    df = df.apply(split_names, 1)    
    df = add_full_name(df)

    return df

Gives us info on the function from the doc string

In [35]:
clean_name_df?

[1;31mSignature:[0m [0mclean_name_df[0m[1;33m([0m[0mdf[0m[1;33m:[0m [0mpandas[0m[1;33m.[0m[0mcore[0m[1;33m.[0m[0mframe[0m[1;33m.[0m[0mDataFrame[0m[1;33m)[0m [1;33m->[0m [0mpandas[0m[1;33m.[0m[0mcore[0m[1;33m.[0m[0mframe[0m[1;33m.[0m[0mDataFrame[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Clean and standardize names for a DataFrame.

Steps performed:
1. Capitalizes column names for consistency.
2. Drops all comlumns that are not 'Fist Name' or 'Last Name'
3. Fill null values in name columns.
4. Accounts for instance where 'First Name' contains 'Last Name'
5. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.

Parameters
----------
df : pd.DataFrame
    Input DataFrame containing name data.

Returns
-------
pd.DataFrame
    A cleaned DataFrame with standardized columns and a 'Full Name' field.
[1;31mFile:[0m      c:\users\leolion023\appdata\local\temp\ipykernel_27352\4077920601.py
[1;31mType:[0m      function

In [36]:
staff_list = clean_name_df(staff_df)
staff_list

Unnamed: 0,First Name,Last Name,Full Name
0,Ailene,Johnston,Ailene Johnston
1,Alli,Rippy,Alli Rippy
2,Amanda,Gearhart,Amanda Gearhart
3,August,Mapp,August Mapp
4,Blake,Herbert,Blake Herbert
5,Brian,Luerman,Brian Luerman
6,classroom,admin,classroom admin
7,Dan,Collins,Dan Collins
8,Danny,Morton,Danny Morton
9,David,York,David York


In [11]:
##clean_mentor_df?

In [28]:
mentor_list = clean_name_df(mentor_df)
mentor_list

Missing name in Last Name filled with Unknown


Unnamed: 0,First Name,Last Name,Full Name
0,Alex,D,Alex D
1,Annaleya,Hamilton,Annaleya Hamilton
2,Austin,Reading,Austin Reading
3,Barbara,Adkins,Barbara Adkins
4,Bill,Montgomery,Bill Montgomery
5,Blare,Robinson,Blare Robinson
6,Chris,Metcalfe,Chris Metcalfe
7,Chris,Kaelin,Chris Kaelin
8,Christina,Porter,Christina Porter
9,Cody,Miller,Cody Miller


In [13]:
meeting_1

Unnamed: 0,First name,Last name,Email,Duration,Time joined,Time exited
0,Alex,D,awda*****@***.com,2 hr 25 min,6:01 PM,8:26 PM
1,Angela,Moore,ange*************@***.com,2 hr 29 min,5:59 PM,8:27 PM
2,Bill,Montgomery,kyho***@***.com,2 hr 27 min,5:59 PM,8:26 PM
3,Charles,Norman,ario****@***.com,2 hr 42 min,5:45 PM,8:27 PM
4,Christina,Porter,chri**************@***.com,2 hr 4 min,6:15 PM,8:19 PM
5,Cindy,Wedding,cwed*******@***.com,2 hr 22 min,6:03 PM,8:25 PM
6,Classroom,Admin,classroom@codeyou.org,1 min,5:59 PM,6:00 PM
7,Joel,Anderson,joel*************@***.com,2 hr 27 min,6:00 PM,8:27 PM
8,Jonathan,Chadwell,jona*******************@***.com,2 hr 24 min,6:02 PM,8:26 PM
9,Mendell,M,menm****@***.com,1 hr 58 min,6:00 PM,8:00 PM


In [14]:
test = process_meeting_times(meeting_1)
test = add_full_name(test)
test

Unnamed: 0,First Name,Last Name,Email,Duration,Time Joined,Time Exited,Duration In Minutes,Full Name
0,Alex,D,awda*****@***.com,2 hr 25 min,1900-01-01 18:01:00,1900-01-01 20:26:00,145.0,Alex D
1,Angela,Moore,ange*************@***.com,2 hr 29 min,1900-01-01 17:59:00,1900-01-01 20:27:00,148.0,Angela Moore
2,Bill,Montgomery,kyho***@***.com,2 hr 27 min,1900-01-01 17:59:00,1900-01-01 20:26:00,147.0,Bill Montgomery
3,Charles,Norman,ario****@***.com,2 hr 42 min,1900-01-01 17:45:00,1900-01-01 20:27:00,162.0,Charles Norman
4,Christina,Porter,chri**************@***.com,2 hr 4 min,1900-01-01 18:15:00,1900-01-01 20:19:00,124.0,Christina Porter
5,Cindy,Wedding,cwed*******@***.com,2 hr 22 min,1900-01-01 18:03:00,1900-01-01 20:25:00,142.0,Cindy Wedding
6,Classroom,Admin,classroom@codeyou.org,1 min,1900-01-01 17:59:00,1900-01-01 18:00:00,1.0,Classroom Admin
7,Joel,Anderson,joel*************@***.com,2 hr 27 min,1900-01-01 18:00:00,1900-01-01 20:27:00,147.0,Joel Anderson
8,Jonathan,Chadwell,jona*******************@***.com,2 hr 24 min,1900-01-01 18:02:00,1900-01-01 20:26:00,144.0,Jonathan Chadwell
9,Mendell,M,menm****@***.com,1 hr 58 min,1900-01-01 18:00:00,1900-01-01 20:00:00,120.0,Mendell M


In [15]:
def clean_meeting(df: pd.DataFrame) -> pd.DataFrame:
    df = process_meeting_times(df)
    df = add_full_name(df)
    return df

In [16]:
meeting_1_cleaned = clean_meeting(meeting_1)
meeting_1_cleaned

Unnamed: 0,First Name,Last Name,Email,Duration,Time Joined,Time Exited,Duration In Minutes,Full Name
0,Alex,D,awda*****@***.com,2 hr 25 min,1900-01-01 18:01:00,1900-01-01 20:26:00,145.0,Alex D
1,Angela,Moore,ange*************@***.com,2 hr 29 min,1900-01-01 17:59:00,1900-01-01 20:27:00,148.0,Angela Moore
2,Bill,Montgomery,kyho***@***.com,2 hr 27 min,1900-01-01 17:59:00,1900-01-01 20:26:00,147.0,Bill Montgomery
3,Charles,Norman,ario****@***.com,2 hr 42 min,1900-01-01 17:45:00,1900-01-01 20:27:00,162.0,Charles Norman
4,Christina,Porter,chri**************@***.com,2 hr 4 min,1900-01-01 18:15:00,1900-01-01 20:19:00,124.0,Christina Porter
5,Cindy,Wedding,cwed*******@***.com,2 hr 22 min,1900-01-01 18:03:00,1900-01-01 20:25:00,142.0,Cindy Wedding
6,Classroom,Admin,classroom@codeyou.org,1 min,1900-01-01 17:59:00,1900-01-01 18:00:00,1.0,Classroom Admin
7,Joel,Anderson,joel*************@***.com,2 hr 27 min,1900-01-01 18:00:00,1900-01-01 20:27:00,147.0,Joel Anderson
8,Jonathan,Chadwell,jona*******************@***.com,2 hr 24 min,1900-01-01 18:02:00,1900-01-01 20:26:00,144.0,Jonathan Chadwell
9,Mendell,M,menm****@***.com,1 hr 58 min,1900-01-01 18:00:00,1900-01-01 20:00:00,120.0,Mendell M


In [17]:
def match_meeting_times(clean_list: pd.DataFrame,clean_meeting_time: pd.DataFrame) -> pd.DataFrame:
    """
    Merges two DataFrame containing the name and duration of members that were present for a meeting.

    Args:
        clean_list (pd.DataFrame): A DataFrame containing the 'Full Name' column.
        clean_meeting_time (pd.DataFrame): A DataFrame containing 'Duration in Minutes' and 'Full Name' columns.

    Returns:
        pd.DataFrame: A DataFrame containing the 'Full Name' and 'Duration in Minutes' columns.
    """
    df = pd.merge(clean_list['Full Name'],clean_meeting_time[['Duration In Minutes','Full Name']], how = 'inner',on='Full Name')
    return df

In [18]:
meeting_1_cleaned.columns

Index(['First Name', 'Last Name', 'Email', 'Duration', 'Time Joined',
       'Time Exited', 'Duration In Minutes', 'Full Name'],
      dtype='object')

In [19]:
#merge meeting 1 with mentor list to consolidate into list of mentors that where present and how long they where present
mentor_time = match_meeting_times(mentor_list,meeting_1_cleaned)
mentor_time

Unnamed: 0,Full Name,Duration In Minutes
0,Alex D,145.0
1,Bill Montgomery,147.0
2,Christina Porter,124.0


In [20]:
#merge staff 1 with mentor list to consolidate into list of staff that where present and how long they where present
staff_time = match_meeting_times(staff_list,meeting_1_cleaned)
staff_time

Unnamed: 0,Full Name,Duration In Minutes


In [21]:
meeting_2_cleaned = clean_meeting(meeting_2)
meeting_2_cleaned

Unnamed: 0,First Name,Last Name,Email,Duration,Time Joined,Time Exited,Duration In Minutes,Full Name
0,Sheyla,Diaz,dshe*****@***.com,1 hr 43 min,1900-01-01 17:58:00,1900-01-01 19:41:00,103.0,Sheyla Diaz
1,James,Glosser,demo**********@***.com,1 hr 40 min,1900-01-01 18:01:00,1900-01-01 19:41:00,100.0,James Glosser
2,Reed,Haddix,shir***********@***.com,1 hr 46 min,1900-01-01 17:55:00,1900-01-01 19:41:00,106.0,Reed Haddix
3,John,Hankins,john**********@***.com,1 hr 38 min,1900-01-01 18:03:00,1900-01-01 19:41:00,98.0,John Hankins
4,Stephanie,Jones,step*******************@***.com,1 hr 48 min,1900-01-01 17:53:00,1900-01-01 19:41:00,108.0,Stephanie Jones
5,Aaron,LaLiberty,daei**@***.com,1 hr 39 min,1900-01-01 18:02:00,1900-01-01 19:41:00,99.0,Aaron LaLiberty
6,Dakota,McMullin,kota****@***.com,1 hr 11 min,1900-01-01 17:59:00,1900-01-01 19:11:00,72.0,Dakota McMullin
7,Toni-Ivy,Ownn.,toni*********@***.com,1 hr 40 min,1900-01-01 18:01:00,1900-01-01 19:41:00,100.0,Toni-Ivy Ownn.
8,Michael,Puckett,mich***********@***.com,1 hr 37 min,1900-01-01 18:04:00,1900-01-01 19:41:00,97.0,Michael Puckett
9,Leighton,Pulliam,l8on**@***.com,1 hr 40 min,1900-01-01 18:00:00,1900-01-01 19:41:00,101.0,Leighton Pulliam


In [22]:
#merge meeting 2 with mentor list to consolidate into list of mentors that where present and how long they where present
mentor_time_2 = match_meeting_times(mentor_list,meeting_2_cleaned)
mentor_time_2

Unnamed: 0,Full Name,Duration In Minutes
0,Michael Puckett,97.0


In [23]:
#merge meeting 2 with staff list to consolidate into list of staff that where present and how long they where present
staff_time_2 = match_meeting_times(staff_list,meeting_2_cleaned)
staff_time_2

Unnamed: 0,Full Name,Duration In Minutes
