In [1]:
import pandas

In [2]:
df = pandas.read_pickle(r"C:\Users\Philipp\Documents\WU\bachelorarbeit\data\raw\vvz_model.pkl")

In [6]:
df[~df["groupId"].isna()].sample(n=5)

Unnamed: 0,courseId,dates,lvLeiter,planpunkte,planpunktIds,ects,groupId
1677,6054,"[{'start': 2025-03-14 08:00:00, 'end': 2025-03...","Mag.Dr. Karl Stückler, BSc (WU), LL.B. (WU), D...",[{'text': 'Jahresabschluss und Unternehmensber...,[5105],8.0,jub
238,4272,"[{'start': 2025-03-14 09:00:00, 'end': 2025-03...",Mag. Nicole Rychly-Resetar,[{'text': 'Standards wissenschaftlichen Arbeit...,"[5136, 6911]",3.0,swa
1488,5755,"[{'start': 2025-03-10 16:30:00, 'end': 2025-03...","Univ.Prof. Jonas Bunte, Ph.D., Steffen Bettin...",[{'text': 'Zukunftsfähiges Wirtschaften: Verti...,[5117],4.0,zuwi
1497,5765,"[{'start': 2025-03-14 16:00:00, 'end': 2025-03...",Mag. Michael Zwick-Pevny,[{'text': 'Jahresabschluss und Unternehmensber...,[5105],8.0,jub
9,4010,"[{'start': 2025-05-05 08:00:00, 'end': 2025-05...",Dipl.-Ing. David Hirnschall,"[{'text': 'Statistik', 'href': '/cgi-bin/vvz.p...",[6024],4.0,s


In [105]:
import pickle
from loguru import logger
from typing import TypeVar, Literal
from itertools import combinations
from collections import defaultdict
from datetime import datetime, time
from functools import lru_cache

from bachelorarbeit.dtypes import Offering
import bachelorarbeit.constraints as C

T = TypeVar("T")


# === Implicit constraints ===


C.COURSE_COUNT_CONSTRAINT = [0, 100]
C.COURSE_PRIORITY_CONSTRAINTS = {}
C.FIXED_TIME_CONSTRAINTS = {}
C.HOUR_LOAD_CONSTRAINT = [0, 100]


C.COURSE_MUST_NOT_SCHEDULE = [
    int(courseId) for courseId, priority in C.COURSE_PRIORITY_CONSTRAINTS.items() if priority == -100
]
C.COURSE_MUST_SCHEDULE = [int(courseId) for courseId, priority in C.COURSE_PRIORITY_CONSTRAINTS.items() if priority == 100]



with open(r"C:\Users\Philipp\Documents\WU\bachelorarbeit\data\raw\offerings.pkl", "rb") as f:
    offerings: list[Offering] = pickle.load(f)


def find_offering(courseId: int) -> Offering | None:
    try:
        return [o for o in offerings if o.courseId == courseId][0]
    except Exception:
        return None



def is_valid_schedule(schedule: list[Offering], ignore_length: bool = False, verbose=False):
    if schedule is None:
        return False

    if schedule_overlaps(schedule):
        if verbose:
            logger.debug("schedule overlaps")
        return False

    if not ignore_length and (
        len(schedule) < C.COURSE_COUNT_CONSTRAINT[0] or len(schedule) > C.COURSE_COUNT_CONSTRAINT[1]
    ):
        if verbose:
            logger.debug("schedule does not satisfy course count constraint")
        return False

    if not all([cId in [o.courseId for o in schedule] for cId in C.COURSE_MUST_SCHEDULE]):
        if verbose:
            logger.debug("mandatory course not scheduled")
        return False

    if not ignore_length:
        min_hrs, max_hrs = weekly_schedule_hours(schedule)
        if min_hrs < C.HOUR_LOAD_CONSTRAINT[0] or max_hrs > C.HOUR_LOAD_CONSTRAINT[1]:
            if verbose:
                logger.debug("schedule does not satisfy hour load constraint")
            return False

    for offering in schedule:
        if violates_hard_constraints(offering, verbose=verbose):
            if verbose:
                logger.debug("schedule violates hard constraints")
            return False

    return True


def schedule_overlaps(schedule: list[Offering]):
    all_date_ranges = flatten([offering.dates for offering in schedule])
    for range1, range2 in combinations(all_date_ranges, 2):
        if dates_overlap(range1["start"], range1["end"], range2["start"], range2["end"]):
            return True
    return False


# === Validate constraints ===


def flatten(xss: list[list[T]]) -> list[T]:
    return [x for xs in xss for x in xs]


def dates_overlap(start1: datetime, end1: datetime, start2: datetime, end2: datetime):
    return start1 < end2 and end1 > start2


def times_overlap(start1: datetime, end1: datetime, start2: int, end2: int):
    start1 = start1.time()
    end1 = end1.time()

    start2 = time(start2, 0)
    end2 = time(end2, 0)

    return start1 < end2 and end1 > start2



def violates_fixed_time(start: datetime, end: datetime):
    for dayHourCombo in C.FIXED_TIME_CONSTRAINTS:
        if is_on_day(start, dayHourCombo[0]) and times_overlap(start, end, dayHourCombo[1], dayHourCombo[2]):
            return True
    return False


def is_on_day(
    dt: datetime, day: Literal["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
) -> bool:
    return dt.strftime("%A").lower() == day.lower()


@lru_cache(maxsize=500)
def violates_hard_constraints(offering: Offering, verbose: bool = False, ignore_must_schedule: bool = True):
    if not ignore_must_schedule and offering.courseId not in C.COURSE_MUST_SCHEDULE:
        logger.debug("mandatory course id not scheduled")
        return True

    if offering.courseId in C.COURSE_MUST_NOT_SCHEDULE:
        logger.debug("course id not allowed")
        return True

    for date in offering.dates:
        if violates_fixed_time(date["start"], date["end"]):
            if verbose:
                logger.debug("violates fixed time")
            return True

    return False


def weekly_schedule_hours(schedule: list[Offering]) -> tuple[float, float]:
    """
    Returns (min_hours, max_hours) the schedule takes up in one week (Mo-Fr),
    counting parallel sessions only once.
    """
    week_intervals = defaultdict(list)  # (year, week) -> list of (start, end)

    for offering in schedule:
        for session in offering.dates:
            start = session["start"]
            end = session["end"]

            # only count Mo - Fr
            if start.weekday() > 4 or end.weekday() > 4:
                continue

            year, week, _ = start.isocalendar()
            week_intervals[(year, week)].append((start, end))

    week_hours = {}
    for week, intervals in week_intervals.items():
        merged = merge_intervals(intervals)
        total_hours = sum((end - start).total_seconds() / 3600 for start, end in merged)
        week_hours[week] = total_hours

    if not week_hours:
        return (0.0, 0.0)

    totals = week_hours.values()
    return (min(totals), max(totals))


def merge_intervals(intervals: list[tuple[datetime, datetime]]) -> list[tuple[datetime, datetime]]:
    """
    Merge overlapping intervals and return a list of disjoint intervals.
    Courses at the same time do not count twice to the hour load constraint
    """
    if not intervals:
        return []
    intervals.sort(key=lambda x: x[0])
    merged = [intervals[0]]

    for current_start, current_end in intervals[1:]:
        last_start, last_end = merged[-1]
        if current_start <= last_end:  # overlap
            merged[-1] = (last_start, max(last_end, current_end))
        else:
            merged.append((current_start, current_end))
    return merged


# === Calculate mark ===


@lru_cache(maxsize=500)
def get_offering_mark(offering: Offering):
    mark = 0
    mark += C.COURSE_PRIORITY_CONSTRAINTS.get(offering.courseId, 0)
    for date in offering.dates:
        for hour, mark_change in [c for c in C.FIXED_TIME_CONSTRAINTS if abs(c[3]) != C.P]:
            if times_overlap(date["start"], date["end"], hour, hour + 1):
                mark += mark_change
    return mark


def get_schedule_mark(schedule: list[Offering]):
    if schedule is None:
        return None

    mark = 0
    for offering in schedule:
        if violates_hard_constraints(offering):
            return None
        mark += get_offering_mark(offering)
    return mark


def rebuild_available_offerings(
    schedule: list[Offering], available_offerings: list[Offering], v3: bool = False
) -> list[Offering]:
    taken_group_ids = list(set(map(lambda offering: offering.groupId, schedule)))
    taken_course_ids = list(map(lambda o: o.courseId, schedule))

    def _filter_available_offerings(previously_available_offering: Offering):
        if previously_available_offering.courseId in taken_course_ids:
            return False

        if previously_available_offering.groupId in taken_group_ids:
            return False

        if not is_valid_schedule([*schedule, previously_available_offering], ignore_length=True):
            return False

        return True

    available_offerings = list(filter(_filter_available_offerings, available_offerings))
    return available_offerings[(len(available_offerings) - 1) // 2 :] if v3 else available_offerings


def preprocess(offerings: list[Offering]) -> list[Offering]:
    """
    Filter the offerings by variable inconsistency as mentioned on p357.
    Return offerings sorted by mark (highest first)
    """
    logger.info(f"preprocessing {len(offerings)} offerings")
    logger.info(f"{C.COURSE_COUNT_CONSTRAINT=}")
    logger.info(f"{C.COURSE_PRIORITY_CONSTRAINTS=}")
    logger.info(f"{C.COURSE_MUST_SCHEDULE=}")
    logger.info(f"{C.COURSE_MUST_NOT_SCHEDULE=}")
    logger.info(f"{C.FIXED_TIME_CONSTRAINTS=}")
    logger.info(f"{C.HOUR_LOAD_CONSTRAINT=}")

    keep_offerings = [
        offering for offering in offerings if offering.groupId is not None and not violates_hard_constraints(offering)
    ]

    for i, offering in enumerate(keep_offerings):
        keep_offerings[i].mark = get_offering_mark(offering)

    must_schedule = get_must_schedule_courses(keep_offerings)
    if schedule_overlaps(must_schedule):
        logger.error(f"sanitfy check failed: must schedule courses {[o.courseId for o in must_schedule]} overlap")
        raise Exception("insane")

    logger.success(f"preprocessed offerings, keep {len(keep_offerings)}")
    return sorted(keep_offerings, key=lambda o: -o.mark)


def get_must_schedule_courses(offerings: list[Offering]) -> list[Offering]:
    must_schedule: list[Offering] = []
    for offerId in C.COURSE_MUST_SCHEDULE:
        offers = [o for o in offerings if o.courseId == offerId]
        if len(offers) < 1:
            logger.error(f"sanitfy check failed: must schedule course {offerId} violates hard constraints")
            raise Exception("insane")
        must_schedule.append(offers[0])
    return must_schedule


In [None]:
"""
Szenario 1
"""

is_valid_schedule(
    [
        find_offering(cId) for cId in [
            5576, # "5576": 100, # ADP
            5033, # "5033": 100, # Makro
            # 6133, # "6133": 100, # ZUWI
            # 6054, # "6054": 100, # JUB
            4010, # "4010": 100, # Statistik
        ]
    ],
    ignore_length=True,
    verbose=True
)

True

In [106]:
"""
Szenario 2
"""

import json
import random

# picks = [random.choice([o for o in offerings if o.groupId]) for i in range(60)]
picks = []

preprocessed_offerings = preprocess(offerings)

for i in range(60):
    pick = None
    while pick is None or pick.courseId in [p.courseId for p in picks]:
        pick = random.choice([o for o in preprocessed_offerings if o.groupId])
    picks.append(pick)


print(json.dumps({
    str(p.courseId): -100 for p in picks
}, indent=4))


print(len([o for o in offerings if o.groupId]))

[32m2025-10-31 15:26:52.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m246[0m - [1mpreprocessing 2154 offerings[0m
[32m2025-10-31 15:26:52.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m247[0m - [1mC.COURSE_COUNT_CONSTRAINT=[0, 100][0m
[32m2025-10-31 15:26:52.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m248[0m - [1mC.COURSE_PRIORITY_CONSTRAINTS={}[0m
[32m2025-10-31 15:26:52.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m249[0m - [1mC.COURSE_MUST_SCHEDULE=[][0m
[32m2025-10-31 15:26:52.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m250[0m - [1mC.COURSE_MUST_NOT_SCHEDULE=[][0m
[32m2025-10-31 15:26:52.433[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m251[0m - [1mC.FIXED_TIME_CONSTRAINTS={}[0m
[32m2025-10-31 15:26:52.434[0m | [1mINFO    [0m | [36m__main__[0m:[36mpreprocess[0m:[36m252[0m - [1mC.HOUR_LOAD_CONSTR

In [104]:


def print_dates(dd: list[dict[Literal["start", "end"], datetime]]):
    for d in dd:
        start = d["start"]
        end = d["end"]
        print(f"{start.strftime('%A')} {start.strftime('%Y-%m-%d')} {start.strftime('%H:%M')} - {end.strftime('%H:%M')}")

offering = random.choice(offerings)


C.FIXED_TIME_CONSTRAINTS = [
    ["monday", 10, 13]
]

print_dates(offering.dates)
violates_hard_constraints(offering, verbose=True, ignore_must_schedule=True)

Monday 2025-03-10 16:30 - 18:00
Wednesday 2025-03-12 16:30 - 18:00
Monday 2025-03-17 16:30 - 18:00
Wednesday 2025-03-19 10:30 - 13:00
Monday 2025-03-24 10:30 - 13:00
Wednesday 2025-03-26 10:30 - 13:00
Monday 2025-03-31 10:30 - 13:00
Wednesday 2025-04-02 10:30 - 13:00
Monday 2025-04-07 10:30 - 13:00
Wednesday 2025-04-09 10:30 - 13:00
Monday 2025-04-28 08:00 - 09:30
[32m2025-10-31 15:12:16.533[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mviolates_hard_constraints[0m:[36m137[0m - [34m[1mviolates fixed time[0m


True