In [1]:
import numpy as np 
import pandas as pd
from docplex.mp.model import Model
from docplex.mp.solution import SolveSolution
from typing import Union, List
from numpy.random import default_rng
import names
import pickle

import cplex
import docplex

from datetime import datetime
rng = default_rng()

In [15]:
from tqdm import tqdm

## Solver Parameters

In [16]:
opt_mod = Model(name='ET MIP')

In [17]:
opt_mod.parameters.timelimit

docplex.mp.params.NumParameter(parameters.timelimit,1e+75)

In [4]:
# opt_mod.parameters.timelimit = 10
# opt_mod.set_time_limit(10)

## Constants

In [5]:
number_exams = 1279
number_students = 10764
number_exam_days = 20 # assumes 3 time slots per day
number_rooms = 77
min_room_size = 50
max_room_size = 200

E = [f'APSC_{code}' for code in rng.choice(range(0, number_exams + 1), size=number_exams, replace=False)] # exams
S = [names.get_full_name() for i in range(number_students)] # students
T = [[datetime(2022, 12, day, 9), datetime(2022, 12, day, 13), datetime(2022, 12, day, 17)] for day in range(1, number_exam_days)] # timeslots
R = [f'RM_{code}' for code in rng.choice(range(number_rooms + 1), size=number_exams, replace=False)] # rooms
Cp = np.random.randint(min_room_size, max_room_size, (len(R))) # capacity of rooms

# # course enrolments
# He_s = np.random.randint(0,2, (len(E),len(S))) # binary random 
# sumHe_s = np.sum(He_s, axis=1)

In [6]:
# with open('E_Pickle.pkl', 'wb') as f:
#     pickle.dump(E, f)

# with open('S_Pickle.pkl', 'wb') as f:
#     pickle.dump(S, f)
    
# with open('T_Pickle.pkl', 'wb') as f:
#     pickle.dump(T, f)
    
# with open('R_Pickle.pkl', 'wb') as f:
#     pickle.dump(R, f)

# with open('He_s_Pickle.pkl', 'wb') as f:
#     pickle.dump(He_s, f)

In [1]:
# with open('E_Pickle.pkl', 'rb') as f:
#     E = pickle.load(f)

# # with open('S_Pickle.pkl', 'rb') as f:
# #     S = pickle.load(f)
    
# with open('T_Pickle.pkl', 'rb') as f:
#     T = pickle.load(f)
    
# with open('R_Pickle.pkl', 'rb') as f:
#     R = pickle.load(f)
    
# He_s = np.random.randint(0,2, (len(E),len(S))) # binary random 
# sumHe_s = np.sum(He_s, axis=1)

In [21]:
print(f"Number of exams          : {len(E)}")
print(f"Number of students       : {len(S)}")
print(f"Number of rooms          : {len(R)}")
print(f"Number of datetime slots : {len(T)}")
print(f"Shape of enrolments      : {He_s.shape}")

Number of exams          : 1279
Number of students       : 2561
Number of rooms          : 77
Number of datetime slots : 19
Shape of enrolments      : (1279, 2561)


## Variables

In [22]:
x = opt_mod.binary_var_matrix(len(E), len(T), name="X_e,t") # whether we use timeslot t for exam e
y = opt_mod.binary_var_matrix(len(E), len(R), name="Y_e,r") # whether we use room r for exam e

## Constraints

C1: For all exams, the sum of the allocated timeslots must be equal to 1

$$\sum_{t\in T_c} X_e,_t=1 \;\forall \; e \in E$$

C2: For all exams, the sum of the allocated rooms must be equal to 1

$$\sum_{r\in R_e} Y_e,_r = 1 \;\forall \; e \in E$$

C3: For every student and timeslot, the sum of the allocated exams must be less or equal to 1. 
- i.e. students can be at only one exam at a time

$$\sum_{e\in E} X_e,_t * H_e,_s \leq 1 \;\forall \; s \in S \; and \; t \in T$$


C4: For all rooms, the sum of students in a room must be less than the capacity of the room

$$\sum_{e\in E} X_e,_t * y_e,_r \leq C_p,_r \;\forall \; r \in R \; and \; t \in T$$

In [23]:
c1 = opt_mod.add_constraints((sum(x[e, t] for t in range(len(T))) == 1 for e in range(len(E))), names='c1') 

In [24]:
c2 = opt_mod.add_constraints((sum(y[e, r] for r in range(len(R))) == 1 for e in range(len(E))), names='c2') 

In [13]:
# c3 = opt_mod.add_constraints((sum(x[e,t] * He_s[e,s] for e in range(len(E))) <= 1 for s in range(len(S)) for t in range(len(T))), names='c3')

In [25]:
# c3 modified constraint 
i = 0
for s in tqdm(range(len(S))):
    for t in range(len(T)):
        cond = sum(x[e,t] * He_s[e,s] for e in range(len(E)))
        if type(cond) != int:
            opt_mod.add_constraint(cond <= 1)
    i += 1

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 2561/2561 [08:43<00:00,  4.89it/s]


In [26]:
# c4 modified constraint
for r in tqdm(range(len(R))):
    for t in range(len(T)):
        cond = sum((x[e,t]*y[e,r]) * sumHe_s[e] for e in range(len(E)))
        if type(cond) != int:
            opt_mod.add_constraint(cond <= Cp[r])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:38<00:00,  1.98it/s]


In [None]:
# c4 = opt_mod.add_constraints((sum(y[e,r] * sumHe_s[e] for e in range(len(E))) <= Cp[r] for r in range(len(R))), names='c4') 

## Objective Function

$$  minimize\; I_T = \sum_{k=1}^{K_T} \; ceil \; \left[ \sum_{c=1}^{C_K}\; N_c \; * \; (ratio \; students \; to \; invigilators) \right] $$

In [39]:
ratio_of_Inv = 1/3

$$\sum_{r \in R} \; ceil \;  \left[ \sum_{e \in E}y_e,_r ( \sum_{s \in S} H_e,_s ) \; (ratio \; students \; to \; invigilators) \right] * \; cost \; per \; invigilator $$

In [40]:
obj_fun =  sum(sum(y[e,r] * sumHe_s[e] for e in range(len(E))) for r in range(len(R)))
opt_mod.set_objective('min', obj_fun)
opt_mod.print_information()

Model: ET MIP
 - number of variables: 122784
   - binary=122784, integer=0, continuous=0
 - number of constraints: 52680
   - linear=51217, quadratic=1463
 - parameters: defaults
 - objective: minimize
 - problem type is: MIQCP


In [38]:
# pickle the model in case it crashes during solving
model_id = 'model_' + datetime.now().strftime("%y-%m-%d-%H-%M")
with open(f'{model_id}.pkl', 'wb') as f:
    pickle.dump(opt_mod, f)

## Processing Solution

In [41]:
def process_solution(sol : SolveSolution) -> Union[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Takes a cplex solution and produces a exam schedule
    
    Parameters
    ----------
    sol : SolveSolution
        solution from the solver
    
    Returns
    -------
    final_schedule : pd.DataFrame
        The schedule formatted in readable format for an exam organizer
    
    df_x : pd.DataFrame
        The results for variable x
    
    df_y : pd.DataFrame
        The results for variable y
    """
    # extract solutions as df
    df_x = sol.get_value_df(x).rename(columns={'key_1':'exam','key_2':'timeslot'})
    df_y = sol.get_value_df(y).rename(columns={'key_1':'exam','key_2':'room'})

    # Add rows with the names of courses and timelots
    exam_col = [E[i] for i in range(len(E)) for j in range(len(T))]
    time_col = [T[j] for i in range(len(E)) for j in range(len(T))]
    df_x["EXAM"] = exam_col
    df_x["TIMESLOT"] = time_col

    # Add rows with the names of courses and rooms
    exam_col = [E[i] for i in range(len(E)) for j in range(len(R))]
    room_col = [R[j] for i in range(len(E)) for j in range(len(R))]
    df_y["EXAM"] = exam_col
    df_y["ROOM"] = room_col
    
    # Produce the final schedule
    final_schedule = df_x[df_x["value"]==1].merge(df_y[df_y["value"]==1], on='EXAM', how='left')
    
    return final_schedule, df_x, df_y

In [42]:
def create_enrolment_df(He_s : np.array, S : List[int]) -> pd.DataFrame:
    """
    Creates a dataframe with the students for each exam/course
    """
    exam_student_pairs = []
    for exam in range(len(He_s)):
        students_in_exam_e = []
        for i, student in enumerate(He_s[exam]):
            if student == 1:
                students_in_exam_e.append(S[i])
        exam_student_pairs.append(students_in_exam_e)
        
    enrolment_df = pd.DataFrame(columns=['EXAM','student'])
    enrolment_df['EXAM'] = E
    enrolment_df['student'] = exam_student_pairs

    return enrolment_df

In [43]:
enrolment_df = create_enrolment_df(He_s, S)
enrolment_df

Unnamed: 0,EXAM,student
0,APSC_806,"[Samantha Williams, Mary Chavez, Cynthia Guill..."
1,APSC_352,"[Robert Taylor, Lowell Coletti, Jamey Wollschl..."
2,APSC_1117,"[Robert Taylor, Mary Chavez, Joe Rand, Doreen ..."
3,APSC_639,"[Samantha Williams, Mary Chavez, Cynthia Guill..."
4,APSC_62,"[Lowell Coletti, William Chessman, Cynthia Gui..."
...,...,...
1274,APSC_896,"[Lowell Coletti, William Chessman, Jamey Wolls..."
1275,APSC_250,"[Samantha Williams, Tina Burkley, Jodi Henry, ..."
1276,APSC_303,"[Robert Taylor, Lowell Coletti, Cynthia Guillo..."
1277,APSC_724,"[Robert Taylor, William Chessman, Jamey Wollsc..."


In [44]:
from docplex.mp.progress import TextProgressListener

In [None]:
opt_mod.clear_progress_listeners()
opt_mod.add_progress_listener(TextProgressListener())
sol = opt_mod.solve(clean_before_solve=True)
if sol:
    print("Found a solution \n")
    schedule, df_x, df_y = process_solution(sol)
    print("Schedule: \n")
    display(schedule.merge(enrolment_df, on='EXAM', how='left'))
    
    run_time = sol.solve_details.time
    
else:
    print("Could not find a solution")
    run_time = np.nan