In [1]:
import pandas as pd

In [2]:
df = pd.read_pickle(r"C:\Users\Philipp\Documents\WU\bachelorarbeit\data\raw\vvz_model.pkl")

In [4]:
df_winf = df[~pd.isna(df["groupId"])]

### Helper functions

```
print_schedule(schedule)
print_offering(offering)
schedule_overlaps(schedule)
```

In [52]:
import datetime
from loguru import logger

def print_schedule(schedule):
    logger.debug(
        f"Schedule:"
    )
    for offering in schedule:
        print_offering(offering)


def print_offering(offering):
    logger.debug(
        f"  Offering {offering.groupId} (LV-ID {offering.courseId}, {offering.ects} ECTS)"
    )
    for date in offering.dates:
        logger.debug(f"    {_format(date['start'])} - {date['end'].strftime('%H:%M')}")


def _format(d: datetime.datetime):
    day = d.strftime("%A")

    return f"{day}{' ' * (10 - len(day))}{d.strftime('%d.%m %H:%M')}"


def schedule_overlaps(schedule) -> bool:
    all_sessions = []
    for offering in schedule:
        all_sessions.extend(offering.dates)

    if not all_sessions:
        return False

    all_sessions.sort(key=lambda x: x["start"])

    for i in range(1, len(all_sessions)):
        prev_session = all_sessions[i - 1]
        curr_session = all_sessions[i]

        if curr_session["start"] < prev_session["end"]:
            logger.debug(f"overlap: {curr_session}, {prev_session}")
            return True

    return False

In [22]:
rounds = 3
objectives = [0.25, 0.33, 0.5, 0.667, 0.75]

import math, json

for o in objectives:
    pct = math.floor(o * 100)
    for i in range(rounds):
        r = i + 1
        obj = {
        "title": f"{pct}% aller Kurse können nicht mehr belegt werden (Zufällige Auswahl).",
            "COURSE_PRIORITY_CONSTRAINTS": { str(cid): -100 for cid in df_winf.sample(frac=o)["courseId"].to_list() },

        "FIXED_TIME_CONSTRAINTS": [
            ["monday", 1, 23, 7],
            ["tuesday", 1, 23, 15],
            ["wednesday", 1, 23, 15],
            ["thursday", 1, 23, 7],
            ["friday", 1, 23, 7],
            ["saturday", 1, 23, -7]
        ],

            }

        with open(fr"C:\Users\Philipp\Documents\WU\bachelorarbeit\models\config\constraint_4442_{pct}_pct_blocked_{r}.json", "w") as f:
            json.dump(obj, f, indent=4)

## Create constraint cases with fixed courses

In [58]:
rounds = 5
objectives = [1, 2, 3, 4, 5, 7, 10]

import math, json, random


def pick_n_courses_from_random_groups(n):
    used = []
    courses = []
    while len(courses) < n:
        groupId = pick_group(used)
        logger.debug(f"scheduled: {len(courses)}")

        for i in range(1000):
            c = pick_course(groupId)
            if not schedule_overlaps([*courses, c]):
                used.append(groupId)
                courses.append(pick_course(groupId))
                break
    return courses

def pick_group(picked_groups):
    groupIds = set(df_winf["groupId"].unique())
    remainingGroupIds = groupIds.difference(set(picked_groups))
    return random.choice(list(remainingGroupIds))

def pick_course(groupId):
    return df_winf[df_winf["groupId"] == groupId].sample(n=1).iloc[0]


for o in objectives:
    for i in range(rounds):
        r = i + 1
        courses = pick_n_courses_from_random_groups(o)
        obj = {
        "title": f"Zahl der fix gesetzten Kurse: {o} (Zufällige Auswahl).",
            "COURSE_PRIORITY_CONSTRAINTS": { str(cid): 100 for cid in courses },

        "FIXED_TIME_CONSTRAINTS": [
            ["monday", 1, 23, 7],
            ["tuesday", 1, 23, 15],
            ["wednesday", 1, 23, 15],
            ["thursday", 1, 23, 7],
            ["friday", 1, 23, 7],
            ["saturday", 1, 23, -7]
        ],

        }

        with open(fr"C:\Users\Philipp\Documents\WU\bachelorarbeit\models\config\constraint_4443_{o}_scheduled_{r}.json", "w") as f:
            json.dump(obj, f, indent=4)

# cs = pick_n_courses_from_random_groups(3)
# print_schedule(cs)
# print(schedule_overlaps(cs))

[32m2026-01-11 11:04:37.560[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mpick_n_courses_from_random_groups[0m:[36m12[0m - [34m[1mscheduled: 0[0m
[32m2026-01-11 11:04:37.569[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mpick_n_courses_from_random_groups[0m:[36m12[0m - [34m[1mscheduled: 0[0m
[32m2026-01-11 11:04:37.578[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mpick_n_courses_from_random_groups[0m:[36m12[0m - [34m[1mscheduled: 0[0m
[32m2026-01-11 11:04:37.591[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mpick_n_courses_from_random_groups[0m:[36m12[0m - [34m[1mscheduled: 0[0m
[32m2026-01-11 11:04:37.602[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mpick_n_courses_from_random_groups[0m:[36m12[0m - [34m[1mscheduled: 0[0m
[32m2026-01-11 11:04:37.609[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mpick_n_courses_from_random_groups[0m:[36m12[0m - [34m[1mscheduled: 0[0m
[32m2026-01-11 11:04:37.611[0m | [34m

KeyboardInterrupt: 

In [66]:
len(df_winf) / 18

21.166666666666668

In [67]:
len(df_winf["groupId"].unique())

28

In [68]:
381/28

13.607142857142858

In [64]:
len(df_winf) / len(df_winf["groupId"].unique())

13.607142857142858

In [79]:
all_dates = []
for _, row in df_winf.iterrows():
    all_dates.extend(list(map(lambda x: x["start"], row.dates)))

df = pd.to_datetime(all_dates)

# Get the day of the week names
days = df.day_name()

# Calculate percentages
counts = days.value_counts(normalize=True) * 100

# Ensure all days are represented (even if 0%)
all_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
counts = counts.reindex(all_days, fill_value=0)

print(counts)

Monday       21.138452
Tuesday      25.437296
Wednesday    21.849985
Thursday     19.656092
Friday       11.265935
Saturday      0.652238
Sunday        0.000000
Name: proportion, dtype: float64


In [80]:
# 2. Convert to datetime format
df = pd.to_datetime(all_dates)

# 3. Extract the full hour (this ignores minutes/seconds)
hours = df.hour

# 4. Calculate the percentage for each hour
hour_percentages = hours.value_counts(normalize=True) * 100

# 5. Sort by hour (0 to 23) and display
print(hour_percentages.sort_index())

7      0.592944
8     16.869256
9      8.271568
10    10.109695
11     6.107323
12     7.975096
13    11.443819
14     8.064038
15     5.899792
16    10.287578
17     5.573673
18     8.775571
19     0.029647
Name: proportion, dtype: float64


In [83]:
import numpy as np

all_dates = []
for _, row in df_winf.iterrows():
    all_dates.extend(list(map(lambda x: (x["start"], x["end"]), row.dates)))

# 2. Create DataFrame and convert to datetime
df = pd.DataFrame(all_dates, columns=['start', 'end'])
df['start'] = pd.to_datetime(df['start'])
df['end'] = pd.to_datetime(df['end'])

# 3. Calculate duration in decimal hours
# (e.g., 1 hour 30 minutes = 1.5)
df['duration_decimal'] = (df['end'] - df['start']).dt.total_seconds() / 3600

# 4. Round to the nearest hour (0.5 rounds up)
df['rounded_hours'] = df['duration_decimal'].apply(lambda x: int(np.floor(x + 0.5)))

# 5. Calculate the percentage distribution
distribution = df['rounded_hours'].value_counts(normalize=True).sort_index() * 100

print(distribution)

rounded_hours
1     0.681886
2    27.127186
3    41.891491
4    27.216128
5     1.808479
6     0.830122
7     0.118589
8     0.326119
Name: proportion, dtype: float64
