In [17]:
import random
import numpy as np

In [28]:
''''
This class to generate data for exam scheduling problem with constraints.
This geneated data is valid data, which means that it is possible to find a solution for this data.
If you want to gnereate data with more conflicts, you can set most_strict = True
If you wnat to generate data that is not solved, you can set valid_solution = False
@params num_subjects                number of subjects
@params num_rooms                   number of rooms
@params num_conflits                (optinal) number of conflicts
@params file_name                   (optinal) file name to save input and solutuion data
@params num_sections_per_day        (optinal) number of sections per day
@params num_days                    (optinal) number of days
@params min_students_per_subject    (optinal) min number of students per subject
@params max_students_per_subject    (optinal) max number of students per subject
@params random_rate                 (optinal) random rate to generate data
@params most_strict                 (optinal) generate more conlicts data
@params valid_solution              (optinal) is generated data solved
@params debug                       (optinal) debug mode to print debug information 
'''
def generate_valid_data(num_subjects: int, num_rooms: int, num_conflits: int = None, file_name: str = None,
                  num_sections_per_day: int = None, num_days: int = None, min_students_per_subject: int = None, 
                  max_students_per_subject: int = None, random_rate: float = None, most_strict: bool = None,
                  valid_solution: bool = None, debug: bool = None):

    if (not min_students_per_subject):
        min_students_per_subject = 20
    
    if (not max_students_per_subject):
        max_students_per_subject = 40
    
    if (not num_sections_per_day):
        num_sections_per_day = 4

    if ((num_conflits == None) & (most_strict == True)):
        num_conflits = num_subjects * (num_subjects - 1) // 2
    else:
        num_conflits = random.randint(0, num_subjects * (num_subjects - 1) // 2)
    
    if (not most_strict):
        most_strict = False

    if (valid_solution == None):
        valid_solution = True
        
    if (valid_solution == False):
        num_conflits = num_subjects * (num_subjects - 1) // 2        
    
    if (not num_days):
        num_days = 5
    
    if (not random_rate):
        random_rate = 0.5

    data = {}
    
    data["random_rate"] = random_rate
    
    data["num_sections_per_day"] = num_sections_per_day

    data["num_subjects"] = num_subjects
    
    data["subjects"] = [i for i in range(data["num_subjects"])]
    
    data["num_students_per_subject"] = [random.randint(min_students_per_subject, max_students_per_subject) 
                                        for i in range(data["num_subjects"])]
    
    
    min_capacity_per_room = min(data["num_students_per_subject"])
    max_capacity_per_room = max(data["num_students_per_subject"]) + 10
    
    data["num_rooms"] = num_rooms
    
    data["num_capacity_per_room"] = [random.randint(min_capacity_per_room, max_capacity_per_room)
                                     for i in range(data["num_rooms"])]
    while (max(data["num_capacity_per_room"]) < max(data["num_students_per_subject"])):
        data["num_capacity_per_room"] = [random.randint(min_capacity_per_room, max_capacity_per_room)
                                     for i in range(data["num_rooms"])]
    
    # Assign subject to each section per days    
    num_sections = 0
    
    subject_count = 0
    
    day_count = 0 
    
    assigned = []
    
    section_mapper = {}
    
    assigned_subjects = []
    
    random_rate_decay = random_rate / data["num_rooms"]
    
    random_rate -= random_rate_decay
    
    while (subject_count < data["num_subjects"]):
        if debug:
            print("subject_count = {}".format(subject_count))
        
        room_pair = [(room, capacity) for room, capacity in enumerate(data["num_capacity_per_room"])]
        
        room_pair.sort(key=lambda x: x[1], reverse=False)
        
        for (room, capacity) in room_pair:
            if debug:
                print("================================")
                print("room = {}, capacity = {}".format(room, capacity))
                print("assigned_subjects = {}".format(assigned_subjects))
                print(data["num_capacity_per_room"])
                print(data["num_students_per_subject"])
                print([num_student_per_subject if (num_student_per_subject <= capacity and s not in assigned_subjects) 
                                    else 0 for s, num_student_per_subject in enumerate(data["num_students_per_subject"])])
            
            
            # Find subject max to fit in room           
            subject = np.argmax([num_student_per_subject if (num_student_per_subject <= capacity and s not in assigned_subjects) 
                                  else 0 for s, num_student_per_subject in enumerate(data["num_students_per_subject"])])

            if (data["num_students_per_subject"][subject] > capacity):
                continue
        
            if (subject in assigned_subjects):
                continue

            if debug:
                print("random_rate = {}".format(random_rate))

            if (random.random() < random_rate):
                random_rate -= random_rate_decay
                continue
                
            assigned_subjects.append(subject)
            assigned.append((subject, room, day_count, num_sections))
            section_mapper[subject] = day_count * data["num_sections_per_day"] + num_sections
            subject_count += 1
            if (subject_count == data["num_subjects"]):
                break
        
        num_sections += 1
        if (num_sections == num_sections_per_day):
            random_rate = data["random_rate"]
            num_sections = 0
            day_count += 1   

    if debug:
        print("assigned = {}".format(assigned))
        print(section_mapper)
    
    # Generate conflicts
    data["conflict_list"] = []
    count_conflict = 0
    subjects_list = [i for i in range(data["num_subjects"])]
    random.shuffle(subjects_list)    
    
    for i in subjects_list:
        for j in subjects_list:
            if (i == j):
                continue
            try:
                data["conflict_list"].index((j, i))
                continue
            except ValueError:
                pass
            
            if (section_mapper[i] != section_mapper[j]):
                data["conflict_list"].append((i, j))
                count_conflict += 1
                if (count_conflict == num_conflits):
                    break
        if (count_conflict == num_conflits):
            break
    
    
    data["num_conflicts"] = len(data["conflict_list"])
    
    # Save input data to file
    tuple_to_string = lambda x : " ".join(str(value) for value in x) if random.Random().random() < 0.5 else " ".join(str(value) for value in reversed(x))
    conflicts_arr = list(map(tuple_to_string, data["conflict_list"]))
    random.shuffle(conflicts_arr)
    
    input_str = ''
    input_str += str(data["num_subjects"]) + "\n" \
        + " ".join(list(map(str, data["num_students_per_subject"]))) \
        + "\n" + str(data["num_rooms"]) + "\n" \
        + " ".join(list(map(str, data["num_capacity_per_room"]))) \
        + "\n" + str( data["num_conflicts"]) + "\n" \
        + "\n".join(conflicts_arr)    

    if (valid_solution):
        with open("data/" + file_name + ".txt", "w+") as f:
            f.write(input_str)
    else:
        pass
    
    # Save solution to file
    solution_str = ''        
    solution_str += "subjects,number_student,rooms,num_seat,day,section" + "\n"
    for subjects, room, day, section in assigned:
        solution_str += str(subjects) + "," + str(data["num_students_per_subject"][subjects]) + "," + str(room) + "," + \
            str(data["num_capacity_per_room"][room]) + "," + str(day) + "," + str(section) + "\n"
    
    if (valid_solution):
        with open("data/" + file_name + "_solution.csv", "w+") as f:
            f.write(solution_str)
    else:
        pass

# Generate data valid data now with above function

In [30]:
# Config for 
configs = [(10, 2), (16, 3), (20, 4),
           (30, 6), (40, 8), (50, 10),
           (60, 12), (70, 16), (80, 20),
           (200, 20)]

for _, config in enumerate(configs):
    for i in range (4):
        num_subjects, num_rooms = config
        generate_valid_data(num_subjects, num_rooms, file_name = "data_{}_{}_({})".format(num_subjects, num_rooms, i), 
                            random_rate=0.5, debug=True)

subject_count = 0
room = 1, capacity = 41
assigned_subjects = []
[48, 41]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
random_rate = 0.25
room = 0, capacity = 48
assigned_subjects = [2]
[48, 41]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
[27, 34, 0, 22, 28, 34, 38, 38, 29, 35]
random_rate = 0.25
subject_count = 2
room = 1, capacity = 41
assigned_subjects = [2, 6]
[48, 41]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
[27, 34, 0, 22, 28, 34, 0, 38, 29, 35]
random_rate = 0.25
room = 0, capacity = 48
assigned_subjects = [2, 6, 7]
[48, 41]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
[27, 34, 0, 22, 28, 34, 0, 0, 29, 35]
random_rate = 0.25
subject_count = 3
room = 1, capacity = 41
assigned_subjects = [2, 6, 7]
[48, 41]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
[27, 34, 0, 22, 28, 34, 0, 0, 29, 35]
random_rate = 0.0
room = 0, capacity = 48
assigned_subjects = [2, 6, 7, 9]
[48, 41]
[27, 34, 38, 22, 28, 34, 38, 38, 29, 35]
[27, 34, 0, 22, 28, 34, 0, 0, 29, 0]
random_r