In [6]:
import pandas as pd
import numpy as np
import random

In [60]:
plan = pd.read_csv('data/schedules/schedule_1.csv', sep=None, engine='python')
pref = pd.read_csv('data/preferences/preferences_1.csv')

In [61]:
# Zmiana typu zajęć na ich pojemność
capacities = {
    'L': 15,
    'P': 15,
    'C': 30,
}

plan = plan[plan['type'] != 'W']
plan['type'] = plan['type'].map(capacities)
plan.rename(columns={'type': 'capacity'}, inplace=True)
plan.head(5)

Unnamed: 0,subject,specialization,capacity,Unnamed: 3,group_id,teacher,classroom,week,day,start_time
0,Przedmiot-1,G,30,30,1,Prowadzący-15,3.27a,,Cz,16:45
1,Przedmiot-1,G,30,30,2,Prowadzący-15,3.27b,,Cz,16:45
2,Przedmiot-1,G,30,30,3,Prowadzący-15,3.27a,,Cz,18:30
3,Przedmiot-1,G,30,30,4,Prowadzący-15,3.27c,,Cz,16:45
4,Przedmiot-1,G,30,30,5,Prowadzący-15,3.27b,,Cz,16:45


In [62]:
# Przydatne struktury
students = pref['student_id'].unique()
subjects = plan['subject'].unique()

num_groups = {}
for subject in subjects:
    n = plan[plan['subject'] == subject]['group_id'].nunique()
    num_groups[subject] = n
print(num_groups)

pref_dict = {
    (row.student_id, row.subject, row.group_id): row.preference
    for row in pref.itertuples()
}

{'Przedmiot-1': 8, 'Przedmiot-2': 8, 'Przedmiot-3': 8, 'Przedmiot-4': 8, 'Przedmiot-6': 17, 'Przedmiot-7': 16}


In [63]:
# Przykład jendego osobnika
individual_df = pd.DataFrame(index=students, columns=subjects)
for student in students:
    for subject in subjects:
        individual_df.loc[student, subject] = random.randint(1, num_groups[subject] + 1)

In [64]:
def fitness(individual, pref_dict):
    total_points = 0
    max_points = pref.groupby(['student_id', 'subject'])['preference'].max().sum()

    for student in individual.index:
        for subject in individual.columns:
            group = individual.loc[student, subject]
            key = (student, subject, group)
            points = pref_dict.get(key, 0) 

            total_points += points
    
    return round(total_points / max_points, 2)

In [65]:
def generate_population(size, students, subjects, num_groups):
    population = []

    for _ in range(size):
        individual = pd.DataFrame(index=students, columns=subjects)

        for student in students:
            for subject in subjects:
                individual.loc[student, subject] = random.randint(1, num_groups[subject] + 1)

        population.append(individual)
    return population


In [66]:
def mutate(individual, num_groups, mutation_rate=0.1):
    mutated = individual.copy()

    for student in mutated.index:
        for subject in mutated.columns:
            if random.random() < mutation_rate:
                new_group = random.randint(1, num_groups[subject] + 1)
                mutated.loc[student, subject] = new_group

    return mutated

In [67]:
def crossover(parent1, parent2):
    students = parent1.index.tolist()
    subjects = parent1.columns.tolist()
    cut = len(students) // 2

    child = pd.DataFrame(index=students, columns=subjects)

    for i, student in enumerate(students):
        source = parent1 if i < cut else parent2
        for subject in subjects:
            child.loc[student, subject] = source.loc[student, subject]

    return child

In [68]:
population = generate_population(20, students, subjects, num_groups)
scores = list(map(float, [fitness(individual, pref_dict) for individual in population]))
print(list(scores))

[0.4, 0.4, 0.44, 0.42, 0.4, 0.4, 0.42, 0.42, 0.39, 0.43, 0.42, 0.4, 0.39, 0.42, 0.42, 0.41, 0.4, 0.42, 0.42, 0.4]


In [69]:
def evolve(
    students, subjects, num_groups, pref_dict,
    population_size=100, generations=100,
    mutation_rate=0.1, elite_size=2
):
    population = generate_population(population_size, students, subjects, num_groups)
    best_individual = None
    best_fitness = -1e9
    history = []

    for gen in range(generations):
        scores = list(map(float, [fitness(individual, pref_dict) for individual in population]))
        max_f = max(scores)
        history.append(max_f)

        if max_f > best_fitness:
            best_fitness = max_f
            best_individual = population[scores.index(max_f)]

        sorted_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
        new_population = [population[i] for i in sorted_indices[:elite_size]]

        while len(new_population) < population_size:
            p1 = random.choice(population)
            p2 = random.choice(population)
            child = crossover(p1, p2)
            child = mutate(child, num_groups, mutation_rate)
            new_population.append(child)

        population = new_population

        if gen % 10 == 0 or gen == generations - 1:
            print(f"Pokolenie {gen}: najlepszy fitness = {max_f}")

    return best_individual, best_fitness, history

In [70]:
best, score, history = evolve(
    students=students,
    subjects=subjects,
    num_groups=num_groups,
    pref_dict=pref_dict,
    population_size=50,
    generations=100,
    mutation_rate=0.1,
    elite_size=2
)

print("Najlepszy fitness:", score)

Pokolenie 0: najlepszy fitness = 0.45
Pokolenie 10: najlepszy fitness = 0.46
Pokolenie 20: najlepszy fitness = 0.49
Pokolenie 30: najlepszy fitness = 0.49
Pokolenie 40: najlepszy fitness = 0.49
Pokolenie 50: najlepszy fitness = 0.49
Pokolenie 60: najlepszy fitness = 0.49
Pokolenie 70: najlepszy fitness = 0.5
Pokolenie 80: najlepszy fitness = 0.5
Pokolenie 90: najlepszy fitness = 0.5
Pokolenie 99: najlepszy fitness = 0.5
Najlepszy fitness: 0.5
Najlepszy przydział (fragment):
