In [1]:
# -*- coding: utf-8 -*-
"""
Interview Slot Assignment Script (Version 2)

This script assigns interviewers from 'Calendly Assignment - People.csv'
to interview slots booked via 'Calendly Assignment - Calendly.csv' based on:
- Interviewer availability (time slots)
- Interviewer skills (AIGF/Generalist vs. Engineering)
- Interviewer preferred domains
- Interviewer preferred candidate Years of Experience (YOE)
- Interviewer priority (lower number = higher priority)

It aims to assign each call to the highest-priority suitable interviewer
available at that time, ensuring no interviewer is double-booked in the same slot.
"""

import csv
from datetime import datetime, timedelta
import re  # Import regex for robust time parsing
import os  # To check if files exist

# --- Configuration ---
PEOPLE_FILE = 'Calendly Assignment - People.csv'
CALENDLY_FILE = 'Calendly Assignment - Calendly.csv'
INTERVIEW_DURATION_MINUTES = 40 # Duration of each interview slot

# --- Helper Functions ---

def parse_time(time_str):
    """
    Parses a time string in HH:MM format (24-hour) into a datetime.time object.
    Handles potential extra characters and slightly malformed strings like '112:40'.

    Args:
        time_str (str): The time string to parse.

    Returns:
        datetime.time: The parsed time object.
        Returns None if the string is invalid or cannot be parsed.
    """
    if not time_str:
        return None
    try:
        # Clean whitespace
        cleaned_time_str = time_str.strip()

        # Standard HH:MM or H:MM
        match = re.fullmatch(r'(\d{1,2}):(\d{2})', cleaned_time_str)
        if match:
            hour, minute = int(match.group(1)), int(match.group(2))
            if 0 <= hour <= 23 and 0 <= minute <= 59:
                 return datetime.strptime(cleaned_time_str, '%H:%M').time()
            else:
                 # Handle invalid hour/minute values gracefully
                 print(f"Warning: Invalid time value {cleaned_time_str}. Hour or minute out of range.")
                 return None

        # Attempt to extract HH:MM from potentially malformed strings (like '112:40')
        search_match = re.search(r'(\d{1,2}:\d{2})', cleaned_time_str)
        if search_match:
            extracted_time = search_match.group(1)
            print(f"Warning: Parsed potentially malformed time '{cleaned_time_str}' as '{extracted_time}'.")
            # Validate extracted time again
            hour, minute = map(int, extracted_time.split(':'))
            if 0 <= hour <= 23 and 0 <= minute <= 59:
                return datetime.strptime(extracted_time, '%H:%M').time()
            else:
                print(f"Warning: Invalid time value {extracted_time} extracted from {cleaned_time_str}.")
                return None

        print(f"Warning: Could not parse time string: '{time_str}'")
        return None
    except ValueError:
        print(f"Warning: Invalid time format encountered: '{time_str}'")
        return None

def parse_availability(availability_str):
    """
    Parses the availability string into a list of (start_time, end_time) tuples.
    Handles errors during parsing of individual time ranges.

    Args:
        availability_str (str): The comma-separated string of time ranges (e.g., "HH:MM - HH:MM, ...").

    Returns:
        list: A list of valid (datetime.time, datetime.time) tuples representing available slots.
    """
    availability_ranges = []
    if not availability_str:
        return availability_ranges

    time_ranges = availability_str.split(',')
    for time_range in time_ranges:
        time_range = time_range.strip()
        if not time_range:
            continue

        parts = time_range.split('-')
        if len(parts) == 2:
            start_str, end_str = parts[0].strip(), parts[1].strip()
            start = parse_time(start_str)
            end = parse_time(end_str)
            if start and end:
                if start < end: # Basic sanity check
                    availability_ranges.append((start, end))
                else:
                    print(f"Warning: Skipping availability range where start time is not before end time: '{time_range}'")
            else:
                print(f"Warning: Could not parse start or end time in availability range: '{time_range}'")
        else:
            print(f"Warning: Skipping malformed availability range (expected 'HH:MM - HH:MM'): '{time_range}'")
    return availability_ranges

def calculate_end_time(start_time, duration_minutes=INTERVIEW_DURATION_MINUTES):
    """
    Calculates the end time given a start time and duration.
    Handles potential midnight crossing.

    Args:
        start_time (datetime.time): The start time of the slot.
        duration_minutes (int): The duration of the slot in minutes.

    Returns:
        datetime.time: The calculated end time. Returns None on error.
    """
    if not start_time:
        return None
    try:
        # Combine with a dummy date to perform arithmetic
        start_dt = datetime.combine(datetime.min, start_time)
        end_dt = start_dt + timedelta(minutes=duration_minutes)
        # Return only the time part
        return end_dt.time()
    except OverflowError:
        # This can happen if the calculation goes beyond the max representable datetime
        print(f"Warning: Time calculation overflow for start time {start_time}. Check logic near midnight if relevant.")
        # Decide how to handle - e.g., cap at 23:59 or return None
        return datetime.strptime("23:59", '%H:%M').time() # Cap at 23:59 for simplicity
    except Exception as e:
        print(f"Error calculating end time for {start_time}: {e}")
        return None

# --- Data Loading ---

def load_data(people_file, calendly_file):
    """
    Loads interviewer and calendly call data from CSV files.
    Handles errors during file reading and data parsing, including new fields.

    Args:
        people_file (str): Path to the people (interviewers) CSV file.
        calendly_file (str): Path to the calendly (calls) CSV file.

    Returns:
        tuple: (people, calendly_calls).
               'people' is a dict mapping interviewer name to their details.
               'calendly_calls' is a list of call dictionaries.
               Returns (None, None) if any critical error occurs during loading.
    """
    people = {}
    calendly_calls = []

    # --- Load People Data ---
    if not os.path.exists(people_file):
        print(f"Error: People file not found at '{people_file}'")
        return None, None

    try:
        with open(people_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            # Check for required columns
            required_person_cols = ['Person', 'Availability', 'AIGF Interviewer', 'Engineering Interviewer', 'YOE', 'Priority', 'Domains']
            if not all(col in reader.fieldnames for col in required_person_cols):
                missing_cols = [col for col in required_person_cols if col not in reader.fieldnames]
                print(f"Error: Missing required columns in people file ('{people_file}'): {', '.join(missing_cols)}")
                return None, None

            for i, row in enumerate(reader, 1):
                person_name = row.get('Person', '').strip()
                if not person_name:
                    print(f"Warning: Skipping row {i+1} in '{people_file}' with empty Person name.")
                    continue

                availability_ranges = parse_availability(row.get('Availability', ''))

                # Parse YOE (set for efficient lookup)
                yoe_str = row.get('YOE', '')
                yoe_set = set(yoe.strip() for yoe in yoe_str.split(',') if yoe.strip())

                # Parse Priority
                try:
                    priority_str = row.get('Priority', '999').strip()
                    priority = int(priority_str) if priority_str else 999
                except ValueError:
                    print(f"Warning: Invalid priority '{row.get('Priority')}' for {person_name} in row {i+1}. Defaulting to 999.")
                    priority = 999

                # Parse Domains (set for efficient lookup, handles commas and newlines)
                domains_str = row.get('Domains', '')
                domains_set = set(domain.strip() for domain in re.split(r'[,\n]+', domains_str) if domain.strip())

                # Ensure boolean parsing is robust
                is_aigf = row.get('AIGF Interviewer', 'No').strip().lower() == 'yes'
                is_eng = row.get('Engineering Interviewer', 'No').strip().lower() == 'yes'

                people[person_name] = {
                    'Name': person_name,
                    'Availability': availability_ranges,
                    'AIGF': is_aigf,
                    'Engineering': is_eng,
                    'YOE': yoe_set,
                    'Priority': priority,
                    'Domains': domains_set
                }
    except FileNotFoundError: # Should be caught by os.path.exists, but double-check
        print(f"Error: People file not found at '{people_file}'")
        return None, None
    except KeyError as e:
        # This might happen if DictReader fails unexpectedly, though column check helps
        print(f"Error: Problem accessing expected column in people file: {e}")
        return None, None
    except Exception as e:
        print(f"An unexpected error occurred loading people file '{people_file}': {e}")
        return None, None

    # --- Load Calendly Calls ---
    if not os.path.exists(calendly_file):
        print(f"Error: Calendly file not found at '{calendly_file}'")
        return None, None

    try:
        with open(calendly_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
             # Check for required columns
            required_calendly_cols = ['StartTime', 'Cx Email', 'Field', 'Years of Experience', 'Coding']
            if not all(col in reader.fieldnames for col in required_calendly_cols):
                missing_cols = [col for col in required_calendly_cols if col not in reader.fieldnames]
                print(f"Error: Missing required columns in calendly file ('{calendly_file}'): {', '.join(missing_cols)}")
                return None, None

            for i, row in enumerate(reader, 1):
                start_time = parse_time(row.get('StartTime', ''))
                field = row.get('Field', '').strip()
                yoe = row.get('Years of Experience', '').strip()
                coding_str = row.get('Coding', 'No').strip().lower()
                email = row.get('Cx Email', f'Row_{i+1}_NoEmail').strip() # Use placeholder if email missing

                # Validate essential fields before adding the call
                missing_fields = []
                if not start_time:
                    missing_fields.append(f"Invalid/Missing StartTime '{row.get('StartTime', '')}'")
                if not field:
                    missing_fields.append("Missing Field")
                if not yoe:
                    missing_fields.append("Missing Years of Experience")
                # Email isn't strictly essential for assignment but good to have
                if not email or '@' not in email:
                     print(f"Warning: Missing or potentially invalid email '{row.get('Cx Email', '')}' in calendly row {i+1}.")


                if missing_fields:
                    print(f"Skipping calendly call in row {i+1} ({email}) due to: {'; '.join(missing_fields)}.")
                    continue # Skip this row

                is_coding = coding_str == 'yes'
                fellowship_type = 'Engineering' if is_coding else 'Generalist'

                calendly_calls.append({
                    'Start Time': start_time,
                    'Cx Email': email,
                    'Field': field,
                    'Linkedin': row.get('Linkedin', ''), # Keep optional fields
                    'Phone': row.get('Phone', ''),      # Keep optional fields
                    'Years of Experience': yoe,
                    'Coding': is_coding,
                    'Fellowship Type': fellowship_type,
                    'Assigned': False,          # Track assignment status
                    'Assigned Interviewer': None # Store who is assigned
                })

        # Sort calendly calls primarily by start time
        calendly_calls.sort(key=lambda x: x['Start Time'])

    except FileNotFoundError: # Should be caught by os.path.exists
        print(f"Error: Calendly file not found at '{calendly_file}'")
        return None, None
    except KeyError as e:
        print(f"Error: Problem accessing expected column in calendly file: {e}")
        return None, None
    except Exception as e:
        print(f"An unexpected error occurred loading calendly file '{calendly_file}': {e}")
        return None, None

    # Optional: Print summary of loaded data
    # print(f"\nLoaded {len(people)} interviewers.")
    # for name, data in people.items():
    #     print(f"  - {name}: Prio={data['Priority']}, Eng={data['Engineering']}, AIGF={data['AIGF']}, YOE={data['YOE']}, Domains={data['Domains']}, Avail#={len(data['Availability'])}")
    # print(f"\nLoaded {len(calendly_calls)} calendly calls.")
    # for call in calendly_calls[:5]: # Print first few for check
    #      print(f"  - {call['Start Time'].strftime('%H:%M')}, {call['Cx Email']}, Field={call['Field']}, YOE={call['Years of Experience']}, FType={call['Fellowship Type']}")
    # print("...")

    return people, calendly_calls

# --- Assignment Logic ---

def assign_interviews(people, calendly_calls):
    """
    Assigns interviewers to calendly calls based on availability, expertise,
    fellowship type, YOE, Domain, and Priority.

    Args:
        people (dict): Dictionary of people (interviewers) with their details.
        calendly_calls (list): List of calendly calls, assumed sorted by time.

    Returns:
        tuple: (assigned_calls_details, unassigned_calls)
               'assigned_calls_details' is a list of dicts with assignment info.
               'unassigned_calls' is a list of call dicts that couldn't be assigned.
    """
    assigned_calls_details = []
    unassigned_calls = []
    # Track assignments per time slot to prevent double booking the same interviewer
    # Key: (start_time, end_time) tuple, Value: set of interviewer names assigned to this slot
    slot_assignments = {}

    # Convert people dict to list for easier iteration if needed, but direct dict iteration works too.
    # Sorting here is less critical now as priority is checked per call.
    # interviewers_list = list(people.values())

    for call in calendly_calls:
        # Double check if it somehow got assigned in a previous iteration (shouldn't happen with current logic)
        if call['Assigned']:
            continue

        call_start_time = call['Start Time']
        call_end_time = calculate_end_time(call_start_time)

        # Skip if end time calculation failed
        if not call_end_time:
            print(f"Skipping call for {call['Cx Email']} starting at {call_start_time.strftime('%H:%M')} due to end time calculation error.")
            unassigned_calls.append(call)
            continue

        call_slot_key = (call_start_time, call_end_time)
        call_slot_str = call_start_time.strftime('%H:%M') + '-' + call_end_time.strftime('%H:%M')

        call_domain = call['Field']
        call_yoe = call['Years of Experience']
        call_fellowship_type = call['Fellowship Type']

        suitable_interviewers = []

        # Find all potentially suitable interviewers based on all criteria
        for person_name, person_details in people.items():

            # 1. Check Time Availability
            is_available = False
            for p_start, p_end in person_details['Availability']:
                # Check if the person's availability range [p_start, p_end)
                # fully contains the call's required range [call_start_time, call_end_time).
                # Note: Comparisons with datetime.time work directly.
                if p_start <= call_start_time and call_end_time <= p_end:
                    is_available = True
                    break # Found a suitable availability slot for this person
            if not is_available:
                continue # Not available at this specific time slot

            # 2. Check Fellowship Type Match
            fellowship_match = (
                (call_fellowship_type == 'Engineering' and person_details['Engineering']) or
                (call_fellowship_type == 'Generalist' and person_details['AIGF'])
            )
            if not fellowship_match:
                continue # Doesn't handle this fellowship type

            # 3. Check Domain Match
            # Requires the call's specific domain to be listed in the interviewer's domains.
            # TODO: Consider adding more flexible matching logic if needed (e.g., parent domains)
            if call_domain not in person_details['Domains']:
                 # Example flexible match (if needed): allow 'Data & Tech' interviewer for 'Data Science' call
                 # if 'Data & Tech' in person_details['Domains'] and 'Data' in call_domain:
                 #    pass # Allow flexible match
                 # else:
                 #    continue # Strict domain mismatch
                 continue # Using strict match for now

            # 4. Check YOE Match
            if call_yoe not in person_details['YOE']:
                continue # YOE range doesn't match

            # 5. Check if already assigned in this exact time slot
            assigned_in_this_slot = slot_assignments.get(call_slot_key, set())
            if person_name in assigned_in_this_slot:
                continue # Already booked for another call in this exact slot

            # If all checks pass, this interviewer is suitable for *this specific call*
            suitable_interviewers.append(person_details) # Add the full details for sorting

        # --- Assign based on Priority ---
        if suitable_interviewers:
            # Sort the suitable candidates by priority (lowest number first)
            suitable_interviewers.sort(key=lambda p: p['Priority'])
            assigned_interviewer = suitable_interviewers[0] # Pick the highest priority (lowest number)
            assigned_interviewer_name = assigned_interviewer['Name']

            # --- Mark call as assigned and update tracking ---
            call['Assigned'] = True
            call['Assigned Interviewer'] = assigned_interviewer_name

            # Add assignment details for final report
            assigned_calls_details.append({
                'Time Slot': call_slot_str,
                'Interviewer': assigned_interviewer_name,
                'Callee Email': call['Cx Email'],
                'Interview Type': call_domain,
                'Fellowship Type': call_fellowship_type,
                'YOE Match': call_yoe,
                'Interviewer Priority': assigned_interviewer['Priority']
            })

            # Record the assignment for this slot to prevent double booking this interviewer
            if call_slot_key not in slot_assignments:
                slot_assignments[call_slot_key] = set()
            slot_assignments[call_slot_key].add(assigned_interviewer_name)

        else:
            # If no suitable interviewer was found after checking everyone
            unassigned_calls.append(call)

    return assigned_calls_details, unassigned_calls

# --- Output ---

def print_assignments(assigned_calls, unassigned_calls):
    """
    Prints the assigned and unassigned calls in a readable format.

    Args:
        assigned_calls (list): List of assigned calls details (dictionaries).
        unassigned_calls (list): List of unassigned calls (dictionaries).
    """
    print("\n--- Assignment Results ---")

    print("\nAssigned Interviews:")
    if not assigned_calls:
        print("  No interviews were assigned.")
    else:
        # Sort output by time slot, then interviewer priority, then interviewer name for consistent ordering
        assigned_calls.sort(key=lambda x: (
            datetime.strptime(x['Time Slot'].split('-')[0], '%H:%M'), # Sort by actual time
            x['Interviewer Priority'],
            x['Interviewer']
        ))
        print(f"  Total assigned: {len(assigned_calls)}")
        for assignment in assigned_calls:
            print(f"  - Slot: {assignment['Time Slot']}, Interviewer: {assignment['Interviewer']} (Prio:{assignment['Interviewer Priority']}), "
                  f"Callee: {assignment['Callee Email']}, Type: {assignment['Interview Type']}, "
                  f"FType: {assignment['Fellowship Type']}, YOE: {assignment['YOE Match']}")

    print("\nUnassigned Interviews:")
    if not unassigned_calls:
        print("  All interviews were assigned successfully.")
    else:
        # Sort unassigned by time for clarity
        unassigned_calls.sort(key=lambda x: x['Start Time'])
        print(f"  Total unassigned: {len(unassigned_calls)}")
        for call in unassigned_calls:
            start_time_str = call['Start Time'].strftime('%H:%M') if call['Start Time'] else "InvalidTime"
            print(f"  - Start Time: {start_time_str}, Callee: {call['Cx Email']}, "
                  f"Type: {call['Field']}, FType: {call['Fellowship Type']}, YOE: {call['Years of Experience']}")
            # TODO: Optionally add reasons for non-assignment (would require more complex tracking in assign_interviews)

# --- Main Execution ---

def main():
    """
    Main function to orchestrate the interview assignment process.
    """
    print(f"Starting interview assignment...")
    print(f"Reading interviewer data from: '{PEOPLE_FILE}'")
    print(f"Reading calendly call data from: '{CALENDLY_FILE}'")

    people, calendly_calls = load_data(PEOPLE_FILE, CALENDLY_FILE)

    if people is None or calendly_calls is None:
        # Errors during loading were already printed
        print("\nExiting due to critical errors during data loading.")
        return # Stop execution

    if not people:
        print("\nWarning: No interviewers loaded. Cannot assign interviews.")
        return
    if not calendly_calls:
        print("\nWarning: No valid calendly calls loaded. Nothing to assign.")
        return

    print(f"\nLoaded {len(people)} interviewers and {len(calendly_calls)} calls successfully.")
    print("Assigning interviews based on availability, skills, domain, YOE, and priority...")

    assigned_calls, unassigned_calls = assign_interviews(people, calendly_calls)

    print_assignments(assigned_calls, unassigned_calls)

    print("\nAssignment process finished.")

if __name__ == "__main__":
    main()

Starting interview assignment...
Reading interviewer data from: 'Calendly Assignment - People.csv'
Reading calendly call data from: 'Calendly Assignment - Calendly.csv'

Loaded 6 interviewers and 22 calls successfully.
Assigning interviews based on availability, skills, domain, YOE, and priority...

--- Assignment Results ---

Assigned Interviews:
  Total assigned: 20
  - Slot: 07:20-08:00, Interviewer: Dileep (Prio:2), Callee: itsnarain7@gmail.com, Type: Product & Design, FType: Generalist, YOE: 0-5 years
  - Slot: 09:20-10:00, Interviewer: Saranesh (Prio:1), Callee: sudip.karnavat@gmail.com, Type: Finance & Accounting, FType: Generalist, YOE: 10-20 years
  - Slot: 09:20-10:00, Interviewer: Dileep (Prio:2), Callee: sagar.lotiya@gmail.com, Type: Product & Design, FType: Generalist, YOE: 10-20 years
  - Slot: 10:00-10:40, Interviewer: Saranesh (Prio:1), Callee: marketing@growthschool.io, Type: HR & People, FType: Generalist, YOE: Student
  - Slot: 10:40-11:20, Interviewer: Dileep (Prio: