In [None]:
import pandas as pd
import numpy as np
import scipy.stats as sp
import gurobipy as gp
from gurobipy import GRB
import datetime as dt
import matplotlib.pyplot as plt
import csv

In [None]:
# STEP 0, DATA PREPROCESSING: 

# Convert the raw data into a format of rows as groups and columns as classes
# The values in the table are the probability that a student in this group would 
# join the waitlist for this particular class
wl=pd.read_csv("onlywaitlists.csv")

def process_raw_data(): 	

	wl_data=wl.groupby(['Class']).sum().reset_index()
	wl_data=wl_data.drop(43)

	wl_tr=wl_data.drop(['Class'], axis=1).transpose()
	wl_tr=wl_tr.drop(["OTHER", "Total", "Cap"], axis=0)
	
	wl_tr["total waitlists"]=wl_tr.sum(axis=1)
	wl_tr["total students"]=wl.iloc[0]

	wl_tr["ratio"]=wl_tr["total waitlists"]/wl_tr["total students"]

	wl_final=wl_tr[range(0,43)].div(wl_tr["total waitlists"], axis=0)

	wl_final['waitlist']=wl_tr["total waitlists"]
	wl_final['students']=wl_tr["total students"]
	wl_final['ratio']=wl_tr["ratio"]
	wl_final=wl_final.fillna(0)
	wl_final = wl_final[wl_final["students"] > 2]

	return wl_final

# Get course capacities from the raw data
def capacities_from_raw_data(): 
	wl_totcap=wl.groupby(['Class']).sum()[["Total", "Cap"]]
	wl_totcap['Ratio']=wl_totcap["Total"]/wl_totcap["Cap"]
	wl_totcap=wl_totcap.reset_index().drop(43)
	wl_totcap=wl_totcap.sort_values(by="Ratio")
	return wl_totcap


In [None]:
# STEP 1: SAMPLING DATA:
# HELPER FUNCTIONS FOR SAMPLING

def sample_class_num(mean, N):
    sample=np.round(np.random.poisson(mean, N))
    sample[sample<0]=0
    return sample

# Randomly chooses courses according to the distribution
def course_ranking(class_num, course_prob):
    if(np.all(course_prob == 0)):
        return np.zeros(class_num)
    return np.random.choice(11,min(class_num, np.count_nonzero(course_prob)),p=course_prob, replace=False)

# For a certain group compute a sample of course rankings
def sample_course_ranking(size):
    class_num=np.repeat(3,size)
    # Since our data 
    wl_totcap=capacities_from_raw_data()
    wl_total=wl_totcap[32:]["Total"].sum()
    class_prob=wl_totcap[32:]["Total"]/wl_total
    rank=[course_ranking(x, class_prob) for x in class_num]
    return rank



In [None]:
# HELPER FUNCTIONS FOR CONVERTING DATA TO ONE-HOT ENCODING

def one_hot_encode(input_list):
    encoded_vector = np.zeros(11)
    encoded_vector[input_list] = 1
    return encoded_vector.tolist()

def demand_set_to_one_hot(all_demand_set):
	one_hot_demand_sets = []
	for student_ds in all_demand_set:
		student_one_hot_ds = []
		for bundle in student_ds:
			one_hot = one_hot_encode(bundle)
			student_one_hot_ds.append(one_hot)
		one_hot_demand_sets.append(student_one_hot_ds)
	return one_hot_demand_sets

def pad_arrays(allocations):
	max_len = len(max(allocations, key=len))
	for allocation in allocations:
		while len(allocation) < max_len:
			allocation.append(allocation[-1])
	return allocations

In [None]:
# HELPER FUNCTIONS FOR GENERATING DATASET 

def generate_demand_set(arr, max_bundle_size):
    possible_bundles = []
    for i in range(len(arr)):
        for j in range(i + 1, len(arr) + 1):
            triangle=[3,4,5]
            bundle = arr[i:j]
            possible_bundles.append((sum(triangle[i:j]), bundle))
    possible_bundles.append((sum([3,5]), [arr[0],arr[2]]))

    possible_bundles.sort(key=lambda x: (x[0], len(x[1])), reverse=True)
    final_possible_bundles = [x[1] for x in possible_bundles if len(x[1]) <= max_bundle_size]
    return final_possible_bundles

def get_all_demand_set(rank,max_bundle_size=3):
    all_demand_sets = []
    for student in rank:
        demand_set = generate_demand_set(student, max_bundle_size)
        all_demand_sets.append(demand_set)

    return all_demand_sets

In [None]:
# METHOD TO GENERATE FULL DATASET

def generate_full_student_dataset(size):
	complete_demand_set = []

	rank=sample_course_ranking(size)
	all_demand_set=get_all_demand_set(rank,3)
	filtered_demand_set = []
	for ds in all_demand_set:
		if len(ds) == 0:
			continue
		filtered_demand_set.append(ds)

	one_hot_demand_set = demand_set_to_one_hot(filtered_demand_set)
	complete_demand_set.extend(one_hot_demand_set)

	return complete_demand_set


In [None]:
# STEP 2: BUDGET PERTURBATIONS 

# HELPER FUNCTIONS FOR BUDGET PERTURBATIONS

# Input: prices for each course & demand set for a student 
# Output: budget allocation for each student
def budget_perturbations(prices, student_demand_set, epsilon,delta, initial_budget): 
	alloc=[]
	budgets=[]
	budget=initial_budget+epsilon 
	for i in student_demand_set: 
		cost=np.dot(prices, i)
		if cost>budget:
			continue
		alloc.append(i)
		budgets.append(max(cost,initial_budget-epsilon))
		while budget>initial_budget-epsilon and budget>=cost:
			budget-=delta
		if budget <= initial_budget-epsilon:
			break
	if len (alloc)==0:
		alloc.append(np.zeros(11))
		budgets.append(0)
	return (alloc, budgets)

In [None]:
# METHOD TO GET BUDGET PERTURBATIONS FOR ALL STUDENTS

def get_budget_perterbations(prices, one_hot_demand_set,epsilon, delta,initial_budgets):
	all_budget_perterbations = []
	all_budget_intervals = []
	for i in range(len(one_hot_demand_set)): 
		student_demand_set=one_hot_demand_set[i]
		initial_budget=initial_budgets[i]
		allocations, budget_intervals = budget_perturbations(prices, student_demand_set, epsilon, delta, initial_budget)
		if allocations != [] and budget_intervals != []:
			all_budget_perterbations.append(allocations)
			all_budget_intervals.append(budget_intervals)
	allocations = pad_arrays(all_budget_perterbations)
	budget_intervals = pad_arrays(all_budget_intervals)
	return allocations, budget_intervals

In [None]:
# STEP 3: CLEARING ERROR OPTIMIZATION - LINEAR PROGRAM IN GUROBI 

def clearing_error_optimizer(a, c):
	n=len(a)
	m=len(c)
	ki=len(a[0])
	model = gp.Model("Clearing_Error_Minimization")

	z = model.addVars(m, vtype=GRB.INTEGER, name="z")
	y = model.addVar(name="y")
	x = model.addVars(n, ki, vtype=GRB.BINARY, name="x")

	model.setObjective(y, sense=GRB.MINIMIZE)
	model.addGenConstrNorm(y, z, 1.0, "normconstr")

	for j in range(m):
		model.addConstr(gp.quicksum(x[i, l] * a[i][l][j] for i in range(n) for l in range(ki)) <= c[j] + z[j], f"clearing_error_nonnegative_{j}")
	
	for i in range(n):
		model.addConstr(gp.quicksum(x[i, l] for l in range(ki)) == 1, f"one_schedule_per_student_{i}")

	model.optimize()

	if model.status == GRB.OPTIMAL:
		optimal_x = model.getAttr("x", x)
		optimal_z = model.getAttr("x", z)
		optimal_obj = model.ObjVal

		print("Optimal x values:", optimal_x)
		print("Optimal z values:", optimal_z)
		return (optimal_x, optimal_z, optimal_obj)
	else:
		print(f"Optimization terminated with status {model.status}")
		model.computeIIS()
		print("\nInfeasible constraints:")
		for c in model.getConstrs():
			if c.IISConstr:
				print(c.constrName)
		return (None, None, None)




In [None]:
# STEP 4: ACEEI ALGORITHM 

# HELPER FUNCTIONS FOR PRINTING RESULTS
def show_classes_per_student(x,a, budgets): 
	student_classes = {}
	student_budgets={}
	for key, value in x.items(): 
		bundle = a[key[0]][key[1]]
		if value != 0: 
			if key[0] not in student_classes: 
				student_demand_set = []
				student_demand_set.append([index for index, value in enumerate(bundle) if value == 1])
				student_classes[key[0]] = student_demand_set
				budget = budgets[key[0]][key[1]]
				student_budgets[key[0]] = budget

	return student_classes, student_budgets

def show_students_per_class(x, a):
	class_students = {}
	for key, value in x.items(): 
		if value == 0:
			continue
		bundle = a[key[0]][key[1]]
		for i, class_num in enumerate(bundle):
			if class_num == 1 and i not in class_students: 
				class_students[i] = [key[0]]
			elif class_num == 1: 
				class_students[i].append(key[0])
	return class_students

def pretty_print_output(x, prices,z,a,b, f):
	student_classes, student_budgets = show_classes_per_student(x,a,b)
	for key in student_classes: 
		f.write("student: "+str(key)+ " classes: "+str(student_classes[key]) +" budget: "+str(student_budgets[key])+ "\n")

	classes_per_student = show_students_per_class(x, a)
	for key in classes_per_student: 
		f.write("class: "+ str(key)+" students: "+ str(classes_per_student[key])+ "\n")
		f.write("class: " + str(key) + " capacity: " + str(len(classes_per_student[key])) + "\n")
	string_list = [str(float_value) for float_value in prices[0]]
	prices_string = ' '.join(string_list)
	f.write("prices: "+ prices_string+ "\n")
	string_list_z = [str(float_value) for float_value in z]
	prices_string_z = ' '.join(string_list_z)
	f.write("z: "+ prices_string_z+ "\n")

In [None]:
# MAIN METHOD FOR ACEEI ALGORITHM

def aceei(delta, epsilon, prices, size, delta_0, dataset): 
	f = open("LOG"+str(dt.datetime.now())+".csv", "w+")
	writer = csv.writer(f, delimiter=',')
	clearing_error=[]
	optimum=9999999
	i=0
	one_hot_demand_set=dataset
	x=0
	initial_budget=np.repeat([1,0.99,0.98,0.97], 50//4+1)
	wl_totcap=capacities_from_raw_data()
	cap = wl_totcap[32:]["Cap"].tolist()

	while optimum>=1: 
		try:
			a, b = get_budget_perterbations(prices, one_hot_demand_set, epsilon,delta, initial_budget)
			c = [int(i/(890/size)) for i in cap]
			
			x, z, optimum= clearing_error_optimizer(a, c)
			if i%10==0:
				writer.writerow([i,"","","","","","","","","","",""])
				writer.writerow(list(prices[0]))
				print(list(prices[0]))
				writer.writerow(list(z.values()))
				print(list(z.values()))
			if i <= 100:
				d=delta_0
			else:
				d=delta
			prices = (np.array(prices) + np.array(list(z.values()))*d).tolist() 
			clearing_error.append(optimum)
			i+=1
		except: 
			print("error")
			return (0,delta,size,prices, x, clearing_error)
	f.close()
	f2=open("RESULT"+str(dt.datetime.now())+".txt", "w+")
	pretty_print_output(x, prices, z.values(), a,b , f2)
	f2.close()
	return (1,delta,size,prices, x, clearing_error)


In [None]:
# STEP 5: RUNNING THE ALGORITHM

dataset=generate_full_student_dataset(50)

# RECORD DEMAND SETS TO CSV FILE (changes each run)
with open('demand_set.csv','w', newline='') as csvfile:
  writer = csv.writer(csvfile, delimiter=' ')
  for i in range(len(dataset)):
    writer.writerow(str(i))
    for j in dataset[i]:
      writer.writerow(j)
      
res=aceei(0.00005, 0.01, [[0] * 11], 50, 0.0001, dataset)