In [39]:
import pandas as pd
import datetime as dt
import holidays
import re

# Define sets of people
set1 = ["jesca", "priscilla"]
set2 = ["Joseph", "sam", "shelton"]
set3 = ["Norbat", "Geoffrey", "antony", "James G", "james M", "Edwin", "Alphonse"]

# Function to get suitable people for a given shift
def suitable_people(day_type, shift):
    """
    day_type: "weekday", "weekend", or "holiday"
    shift: "A", "B", or "C"
    returns: list of possible people for that shift
    """

    if day_type == "weekday":
        if shift == "A":
            return set1 + set3   # weekday A → set1 + set3
        elif shift in ["B", "C"]:
            return set2 + set3   # weekday B/C → set2 + set3
        else:
            return []

    elif day_type in ["weekend", "holiday"]:
        if shift == "A":
            return set2 + set3   # weekend/holiday A → set2 + set3
        elif shift in ["B", "C"]:
            return set2 + set3   # weekend/holiday B/C → set2 + set3
        else:
            return []

    return []


# Function to categorize today
def get_today_category(today_date):
    kenya_holidays = holidays.Kenya()
    day_name = today_date.strftime("%A")

    if today_date in kenya_holidays:
        holiday_name = kenya_holidays.get(today_date)
        return "holiday"
    elif day_name in ["Saturday", "Sunday"]:
        return "weekend"
    else:
        return "weekday"


# Function to get suggested people for a shift
def assign_shift(shift, today_date):
    day_type = get_today_category(today_date)
    possible_people = suitable_people(day_type, shift)
    return possible_people


# ---------------- Example usage ----------------
# input_date should be a string in "YYYY-MM-DD" format
input_date = "2025-08-08"   # <-- change date here if needed
today = dt.datetime.strptime(input_date, "%Y-%m-%d").replace(hour=16, minute=0)
print("Today is:", today.strftime("%A, %d-%m-%Y"))

for s in ["A", "B", "C"]:
 
#  print(f"Suggested for shift {s}:", assign_shift(s, today))
# group1 = suggested for shift A on weekdays
# group2 = suggested for shift B/C on weekdays
# group3 = suggested for A weekends shift on any days                   




 learningData = pd.read_csv("C:/Users/samy/Downloads/shift_schedule_august_2025.csv" )
# print(learningData.head())   
shiftA = learningData['Shift 1 (08:00–16:00)']
shiftB = learningData['Shift 2 (16:00–00:00)']
shiftC = learningData['Shift 3 (00:00–08:00)']


# Function to get last shift info for set2 and set3
def last_shift_info(learningData, input_date, set2, set3):
    # Parse cutoff datetime
    end_date = dt.datetime.strptime(input_date, "%d-%m-%Y").replace(hour=16, minute=0)
    
    # Normalize names
    def _normalize_name(name):
        return name.strip().lower().replace(".", "")
    
    set2_set3_lower = set(_normalize_name(n) for n in set2 + set3)
    last_shift = {name: None for name in set2_set3_lower}

    learningData_dates = pd.to_datetime(learningData['Date'], dayfirst=True, errors="coerce")

    for idx, row in learningData.iterrows():
        row_date = learningData_dates[idx]
        if pd.isna(row_date):
            continue

        for shift_col, shift_end_time in [
            ('Shift 1 (08:00–16:00)', dt.time(16, 0)),
            ('Shift 2 (16:00–00:00)', dt.time(0, 0)),
            ('Shift 3 (00:00–08:00)', dt.time(8, 0))
        ]:
            names_cell = row.get(shift_col)
            if pd.isna(names_cell):
                continue

            for name in str(names_cell).split(','):
                name_clean = _normalize_name(name)
                if name_clean in set2_set3_lower:
                    
                    # Default shift datetime
                    shift_datetime = dt.datetime.combine(row_date.date(), shift_end_time)

                    # ✅ Special handling:
                    # - Shift 2 (00:00) actually means "end at midnight same calendar day" → keep it same day
                    # - Shift 3 (08:00) → next morning, but only count if <= cutoff
                    if shift_col == 'Shift 2 (16:00–00:00)':
                        # Keep as SAME day 23:59
                        shift_datetime = dt.datetime.combine(row_date.date(), dt.time(23, 59))
                    elif shift_col == 'Shift 3 (00:00–08:00)':
                        shift_datetime += dt.timedelta(days=1)

                    # ✅ Only count if shift ends before/equal cutoff
                    if shift_datetime <= end_date:
                        if last_shift[name_clean] is None or shift_datetime > last_shift[name_clean]:
                            last_shift[name_clean] = shift_datetime

    # Convert dict → DataFrame
    data = []
    for name, shift_time in last_shift.items():
        if shift_time:
            hours_since = (end_date - shift_time).total_seconds() / 3600
        else:
            hours_since = None
        data.append({
            "Name": name.capitalize(),
            "Last Shift End": shift_time,
            "Hours Since Last Shift": hours_since
        })

    result_df = pd.DataFrame(data).sort_values(by="Hours Since Last Shift", na_position="last")
    return result_df

# print("Last shift info for set2 and set3 up to", today.strftime("%d-%m-%Y %H:%M"))
# print(last_shift_info(learningData, today.strftime("%d-%m-%Y"), set2, set3))


def last_shift_infoA(learningData, today, set2, set3):
    end_date = (today - dt.timedelta(hours=8))

    def _normalize_name(name):
        return name.strip().lower().replace(".", "")
    
    set2_set3_lower = set(_normalize_name(n) for n in set2 + set3)
    all_shifts = {name: [] for name in set2_set3_lower}

    learningData_dates = pd.to_datetime(learningData['Date'], dayfirst=True, errors="coerce")

    for idx, row in learningData.iterrows():
        row_date = learningData_dates[idx]
        if pd.isna(row_date):
            continue

        for shift_col, shift_end_time in [
            ('Shift 1 (08:00–16:00)', dt.time(16, 0)),
            ('Shift 2 (16:00–00:00)', dt.time(23, 59)),  # ✅ always 23:59
            ('Shift 3 (00:00–08:00)', dt.time(8, 0))
        ]:
            names_cell = row.get(shift_col)
            if pd.isna(names_cell):
                continue

            for name in str(names_cell).split(','):
                name_clean = _normalize_name(name)
                if name_clean in set2_set3_lower:
                    shift_datetime = dt.datetime.combine(row_date.date(), shift_end_time)

                    if shift_col == 'Shift 3 (00:00–08:00)':
                        shift_datetime += dt.timedelta(days=1)

                    if shift_datetime <= end_date:
                        all_shifts[name_clean].append(shift_datetime)

    # ✅ Always pick the latest shift ≤ cutoff
    data = []
    for name, shifts in all_shifts.items():
        shifts_sorted = sorted(shifts)
        last_valid = shifts_sorted[-1] if shifts_sorted else None

        if last_valid:
            hours_since = (end_date - last_valid).total_seconds() / 3600
        else:
            hours_since = None

        data.append({
            "Name": name.capitalize(),
            "Last Shift End": last_valid,
            "Hours Since Last Shift": hours_since
        })

    return pd.DataFrame(data).sort_values(by="Hours Since Last Shift", na_position="last")
last_info_df = last_shift_infoA(learningData, today, set2, set3)

# Print to console
print("Final DataFrame InfoA:")
print(last_info_df.to_string(index=False))

# prediction for shift A on weekdays


def predict_shiftA_weekday(learningData, today, last_shift_infoA):    
    day_type = get_today_category(today)
    if day_type != "weekday":
        return {
            "message": "Shift A weekday prediction is only for weekdays",
            "graded_set3": [],
            "set3_rest_info": {},
            "df_result": None
        }

    # ✅ Directly call last_shift_infoA (returns DataFrame)
    last_info_df = last_shift_infoA(learningData, today, set2, set3)

    # Convert to dict for lookup: name → hours since last shift
    last_shift_dict = (
        last_info_df
        .assign(Name=lambda df: df["Name"].str.strip().str.lower())
        .set_index("Name")["Hours Since Last Shift"]
        .to_dict()
    )

    rest_info = {}
    graded_people = set()

    for person in set3:
        key1 = person.lower()
        key2 = person.capitalize()
        hours_rest = last_shift_dict.get(key2) or last_shift_dict.get(key1)

        print(f"Evaluating {person}: hours_rest = {hours_rest}")

        if hours_rest is None:
            grade = 100
        else:
            if hours_rest >= 32:
                grade = 100
            elif hours_rest >= 24:
                grade = 75
            elif hours_rest >= 16:
                grade = 50
            else:
                grade = 0

        rest_info[person] = {"hours_rest": hours_rest, "grade": grade}
        if grade > 0:
            graded_people.add(person)

    # ✅ Build DataFrame
    df_result = pd.DataFrame.from_dict(rest_info, orient="index").reset_index()
    df_result.rename(columns={"index": "Name"}, inplace=True)

    return {
        "message": "Success",
        "graded_set3": list(graded_people),
        "set3_rest_info": rest_info,
        "df_result": df_result
    }

def most_suitable_people(learningData, today, graded_people, rest_info):
    # Define week window: Monday → today before 08:00
    start_of_week = today - dt.timedelta(days=today.weekday()) 
    start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0) 
    end_of_lookback = today.replace(hour=7, minute=0, second=0, microsecond=0)

    # Reshape learningData into long format (Date, Shift, Name)
    long_df = (
        learningData
        .melt(id_vars=["Date"], var_name="Shift", value_name="NameList")
        .dropna(subset=["NameList"])
    )

    # Convert Date → datetime
    long_df["Date"] = pd.to_datetime(long_df["Date"], errors="coerce")

    # Split comma-separated names into rows
    long_df["Name"] = long_df["NameList"].str.split(",")
    long_df = long_df.explode("Name")
    long_df["Name"] = long_df["Name"].str.strip().str.lower()

    # ✅ Candidates: grade == 75
    candidates = {p for p, info in rest_info.items() if info["grade"] >= 75}

    # ✅ Only look at Shift A (08:00–16:00 → "Shift 1")
    mask = (
        (long_df["Shift"].str.contains("Shift 1")) &
        (long_df["Date"] >= start_of_week) &
        (long_df["Date"] < end_of_lookback)
    )
    worked_shiftA = set(long_df.loc[mask, "Name"])
    print("worked_shiftA in week:", worked_shiftA  )

    # ✅ Only keep candidates who have NOT worked Shift A this week
    suitable = {p for p in candidates if p.lower() not in worked_shiftA}

    # Build DataFrame from rest_info
    df = pd.DataFrame.from_dict(rest_info, orient="index").reset_index()
    df.rename(columns={"index": "Name"}, inplace=True)

    # Filter for suitable candidates only
    df["Name_lower"] = df["Name"].str.lower()
    df = df[df["Name_lower"].isin([s.lower() for s in suitable])]
    df = df.drop(columns=["Name_lower"]).reset_index(drop=True)

    return df

result = predict_shiftA_weekday(learningData, today, last_shift_infoA)

if result["message"] != "Success":
    print(result["message"])
else:
    print("\nGraded Shift A (Weekday) Results:")
    print(result["df_result"])

    if result["graded_set3"]:   # only call if we have candidates
        most_suitable_people_df = most_suitable_people(
            learningData, 
            today, 
            result["graded_set3"], 
            result["set3_rest_info"]
        )
        print("\nMost Suitable People for Shift A (Weekday):")
        print(most_suitable_people_df)
    else:
        print("\nNo suitable people available for Shift A today.")


# prediction for shift B 
# shift b is for set2 and set3 only
# use last_shift_info function to get rest hours
# must have rested for at least 16 hours and give percentage of rest hours 


def predict_shiftB(learningData, today, last_shift_df):
    # print(f"\n=== Predicting Shift B on {today.strftime('%A, %d-%m-%Y')} ===")

    # Normalize eligible people (set2 + set3)
    set2_set3_lower = set(n.lower() for n in set2 + set3)
    # print("Eligible pool (set2+set3):", set2_set3_lower)

    # Convert last_shift_df → dict {Name: {...}}
    last_shift_information = last_shift_df.set_index("Name").to_dict(orient="index")

    most_suitable = set()
    rest_grades = {}
    rows = []  # to collect for dataframe

    for name, info in last_shift_information.items():
        person = name.lower()
        if person not in set2_set3_lower:
            continue  # skip people not in set2+set3

        hours_rest = info.get("Hours Since Last Shift", 0)

        # Grading logic
        if hours_rest is None:
            grade = 100
        elif hours_rest >= 32:
            grade = 100
        elif hours_rest >= 24:
            grade = 75
        elif hours_rest >= 16:
            grade = 50
        else:
            grade = 0

        # Eligibility
        eligible = (hours_rest is None) or (hours_rest >= 16)
        if eligible:
            most_suitable.add(name)

        # Store results
        rest_grades[name] = {
            "hours_rest": hours_rest,
            "grade": grade,
            "eligible": eligible
        }

        # Collect row for dataframe
        rows.append({
            "Name": name,
            "Last Shift End": info.get("Last Shift End"),
            "Hours Rested": hours_rest,
            "Grade %": grade,
            "Eligible": eligible
        })

        # Debug print
        # print(f" - {name}: last shift {info['Last Shift End']}, "
        #       f"rested {hours_rest:.1f}h → grade {grade}%, "
        #       f"{'ELIGIBLE' if eligible else 'NOT eligible'}")

    # Handle people in set2+set3 never seen in last_shift_info
    for person in set2_set3_lower:
        names_in_df = [n.lower() for n in last_shift_information.keys()]
        if person not in names_in_df:
            rest_grades[person] = {"hours_rest": None, "grade": 100, "eligible": True}
            most_suitable.add(person)
            rows.append({
                "Name": person,
                "Last Shift End": None,
                "Hours Rested": None,
                "Grade %": 100,
                "Eligible": True
            })
            print(f" - {person}: never worked → grade 100%, ELIGIBLE")

    # Convert results into DataFrame
    result_df = pd.DataFrame(rows).sort_values(by=["Eligible", "Grade %", "Hours Rested"], ascending=[False, False, False])

    return {
        "most_suitable ShiftB": list(most_suitable),
        "set2_set3_rest_grades ShiftB": rest_grades,
        "result_df": result_df
    }
results = predict_shiftB(learningData, today, last_shift_info(learningData, today.strftime("%d-%m-%Y"), set2, set3))
print("\nFinal DataFrame:\n", results["result_df"])




def predict_shiftC(learningData, input_date, set2, set3):
    # Parse cutoff datetime → 00:00 (midnight)
    end_date = (dt.datetime.strptime(input_date, "%d-%m-%Y") + dt.timedelta(days=1)).replace(hour=0, minute=0)


    # Normalize names
    def _normalize_name(name):
        return name.strip().lower().replace(".", "")
    
    set2_set3_lower = set(_normalize_name(n) for n in set2 + set3)
    last_shift = {name: None for name in set2_set3_lower}

    learningData_dates = pd.to_datetime(learningData['Date'], dayfirst=True, errors="coerce")

    for idx, row in learningData.iterrows():
        row_date = learningData_dates[idx]
        if pd.isna(row_date):
            continue

        for shift_col, shift_end_time in [
            ('Shift 1 (08:00–16:00)', dt.time(16, 0)),
            ('Shift 2 (16:00–00:00)', dt.time(0, 0)),
            ('Shift 3 (00:00–08:00)', dt.time(8, 0))
        ]:
            names_cell = row.get(shift_col)
            if pd.isna(names_cell):
                continue

            for name in str(names_cell).split(','):
                name_clean = _normalize_name(name)
                if name_clean in set2_set3_lower:
                    
                    # Default shift datetime
                    shift_datetime = dt.datetime.combine(row_date.date(), shift_end_time)

                    # ✅ Special handling
                    if shift_col == 'Shift 2 (16:00–00:00)':
                        shift_datetime = dt.datetime.combine(row_date.date(), dt.time(23, 59))
                    elif shift_col == 'Shift 3 (00:00–08:00)':
                        shift_datetime += dt.timedelta(days=1)

                    # ✅ Only include shifts that ended before/equal to 00:00 cutoff
                    if shift_datetime <= end_date:
                        if last_shift[name_clean] is None or shift_datetime > last_shift[name_clean]:
                            last_shift[name_clean] = shift_datetime

    # Convert dict → DataFrame
    data = []
    for name, shift_time in last_shift.items():
        if shift_time:
            hours_since = (end_date - shift_time).total_seconds() / 3600
        else:
            hours_since = None
        data.append({
            "Name": name.capitalize(),
            "Last Shift End": shift_time,
            "Hours Since Last ShiftC": hours_since
        })

    result_df = pd.DataFrame(data).sort_values(by="Hours Since Last ShiftC", na_position="last")
    return result_df   
shiftC_df = predict_shiftC(
    learningData, 
    today.strftime("%d-%m-%Y"), 
    set2, 
    set3
)
print(shiftC_df)


# prediction for shift A of weekends and holidays
# must have not worked for the last weekend or holiday shiftA within the last 14 days
# only people from set2 and set3 are considered
# must have rested for at least 16 hours
def predict_shiftA_weekend_holiday(learningData, today):    
    day_type = get_today_category(today)
    if day_type not in ["weekend", "holiday"]:
        return "Shift A weekend/holiday prediction is only for weekends and holidays"

    # Lookback window
    start_of_lookback = today - dt.timedelta(days=8)
    end_of_lookback = today - dt.timedelta(days=1)

    # Parse dates in dataset
    learningData_dates = pd.to_datetime(learningData['Date'], dayfirst=True, errors="coerce")

    # Filter last 14 days
    lookback_data = learningData[(learningData_dates >= start_of_lookback) &
                                 (learningData_dates <= end_of_lookback)]

    # Collect people who already worked Shift A (weekends/holidays)
    set2_set3_lower = set(n.lower() for n in set2 + set3)
    worked_shiftA_weekend_holiday = set()
    last_shift_time = {}

    for idx, row in lookback_data.iterrows():
        row_date = pd.to_datetime(str(row['Date']).strip(), dayfirst=True, errors='coerce')
        if pd.isna(row_date):
            continue
        row_day_name = row_date.strftime("%A")
        is_holiday = row_date in holidays.Kenya()
        if row_day_name in ["Saturday", "Sunday"] or is_holiday:
            names_cell = row.get('Shift 1 (08:00–16:00)')
            if pd.isna(names_cell):
                continue
            for name in str(names_cell).split(','):
                name_clean = name.strip().lower()
                if name_clean in set2_set3_lower:
                    worked_shiftA_weekend_holiday.add(name_clean)
                    # Track the most recent time they worked
                    shift_datetime = dt.datetime.combine(row_date.date(), dt.time(16, 0))  # Shift A ends 16:00
                    if name_clean not in last_shift_time or shift_datetime > last_shift_time[name_clean]:
                        last_shift_time[name_clean] = shift_datetime
                        print("lastshift infor c: ", name_clean, shift_datetime)

    # Eligible people (ignoring history)
    suitable_for_shiftA_wh = set(n.lower() for n in suitable_people(day_type, "A"))

    # Exclude those who worked Shift A recently
    available_people = suitable_for_shiftA_wh - worked_shiftA_weekend_holiday

    print("Worked last two weekends", worked_shiftA_weekend_holiday)



    last_shift_dict = last_info_df.set_index(last_info_df["Name"].str.lower())["Last Shift End"].to_dict()
    print("last info: ", last_shift_dict)

    most_suitable = set()
    rest_info = {}
  

    for person in available_people:
        name_clean = person.lower()
        last_time = last_shift_dict.get(name_clean)

        if last_time is None:
        # Never worked → full grade
            hours_rest = None
            grade = 100
            most_suitable.add(person)
        else:
        # Compute hours since last shift (reference 08:00 today)
           reference_time = dt.datetime.combine(today.date(), dt.time(8, 0))
           hours_rest = (reference_time - last_time).total_seconds() / 3600

        # Assign grade
           if hours_rest >= 32:
                  grade = 100
           elif hours_rest >= 24:
               grade = 75
           elif hours_rest >= 16:
               grade = 50
           else:
               grade = 0

           if grade > 0:
               most_suitable.add(person)

    # Store info for printing or DataFrame
        rest_info[person] = {"hours_rest": hours_rest, "grade": grade}
    result_most_suitable = {
    person: rest_info[person] for person in most_suitable
        }
    resulting_available_people= {
    person: rest_info[person] for person in available_people
        }
        

# Convert to DataFrame if needed
    df_rest = pd.DataFrame.from_dict(rest_info, orient="index").reset_index()
    df_rest.rename(columns={"index": "Name"}, inplace=True)

    print("Lookback range:", start_of_lookback.date(), "to", end_of_lookback.date())
    print("Worked Shift A on weekends/holidays in last 14 days:", worked_shiftA_weekend_holiday)
    print("All suitable for Shift A on weekends/holidays:", suitable_for_shiftA_wh)
    print("Available (excluded recent workers):", resulting_available_people)
    print("Most suitable (rested ≥24h):", result_most_suitable)

    return {
        "most_suitable": (result_most_suitable),
        "fallback_suitable": (resulting_available_people)
    }

print("Predicted people for Shift A on weekends/holidays:", 
      predict_shiftA_weekend_holiday(learningData, today))




# output inform of arry for all shifts








Today is: Friday, 08-08-2025
Final DataFrame InfoA:
    Name      Last Shift End  Hours Since Last Shift
 Shelton 2025-08-08 08:00:00                0.000000
Geoffrey 2025-08-08 08:00:00                0.000000
     Sam 2025-08-07 23:59:00                8.016667
  Norbat 2025-08-07 23:59:00                8.016667
Alphonse 2025-08-07 16:00:00               16.000000
  Joseph 2025-08-07 08:00:00               24.000000
  Antony 2025-08-07 08:00:00               24.000000
 James m 2025-08-06 23:59:00               32.016667
 James g 2025-08-06 23:59:00               32.016667
   Edwin 2025-08-01 23:59:00              152.016667
Evaluating Norbat: hours_rest = 8.016666666666667
Evaluating Geoffrey: hours_rest = 0.0
Evaluating antony: hours_rest = 24.0
Evaluating James G: hours_rest = 32.016666666666666
Evaluating james M: hours_rest = 32.016666666666666
Evaluating Edwin: hours_rest = 152.01666666666668
Evaluating Alphonse: hours_rest = 16.0

Graded Shift A (Weekday) Results:
       Name 