In [83]:
import pandas as pd
import numpy as np
import scipy.stats as sp
import gurobipy as gp
from gurobipy import GRB
import datetime as dt
import matplotlib.pyplot as plt

In [84]:
wl=pd.read_csv("onlywaitlists.csv")
# sum together enrollment and waitlist data
wl_data=wl.groupby(['Class']).sum().reset_index()
# dtop the row the totals waitlist number per class
wl_data=wl_data.drop(43)
print(wl_data["Cap"])
# drop excess column/rows, transpose to get groups as rows
wl_tr=wl_data.drop(['Class'], axis=1).transpose()
wl_tr=wl_tr.drop(["OTHER", "Total", "Cap"], axis=0)
# compute the total number of waitlists per group
wl_tr["total waitlists"]=wl_tr.sum(axis=1)
# total number of students in each group
wl_tr["total students"]=wl.iloc[0]

# compute the ratio betweem waitlists and total students
# this ratio can be interpeted as the average number of waitlists joined by students in this group
wl_tr["ratio"]=wl_tr["total waitlists"]/wl_tr["total students"]
# divide the number of waitlists per each course by the total number of waitlists
# this can be interpreted as the probability that a student in this group would join the waitlist for this particular class
wl_final=wl_tr[range(0,43)].div(wl_tr["total waitlists"], axis=0)
# bring back the previous columns, fill in naN values with 0
wl_final['waitlist']=wl_tr["total waitlists"]
wl_final['students']=wl_tr["total students"]
wl_final['ratio']=wl_tr["ratio"]
wl_final=wl_final.fillna(0)
# get rid of rows with no students 
wl_final = wl_final[wl_final["students"] > 2]
wl_final

0      50.0
1      75.0
2      30.0
3      30.0
4      35.0
5      30.0
6      20.0
7      25.0
8     490.0
9     796.0
10    160.0
11     50.0
12     60.0
13    500.0
14    398.0
15    360.0
16    300.0
17    598.0
18    120.0
19    160.0
20     75.0
21     75.0
22    398.0
23     45.0
24     40.0
25     69.0
26     60.0
27     40.0
28    328.0
29    299.0
30    160.0
31    100.0
32    147.0
33    220.0
34     60.0
35    630.0
36     30.0
37    280.0
38    100.0
39    170.0
40    110.0
41    200.0
42    200.0
Name: Cap, dtype: float64


  wl_data=wl.groupby(['Class']).sum().reset_index()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,36,37,38,39,40,41,42,waitlist,students,ratio
BCCR1,0.005682,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.357955,...,0.0,0.005682,0.0,0.0,0.0,0.0,0.039773,176.0,132,1.333333
BCCR2,0.004167,0.0,0.0,0.004167,0.0,0.0,0.0,0.0,0.0,0.058333,...,0.0,0.1,0.0,0.0,0.0,0.0,0.0375,240.0,131,1.832061
BCCR3,0.004587,0.0,0.004587,0.03211,0.0,0.0,0.0,0.0,0.0,0.059633,...,0.0,0.055046,0.022936,0.0,0.0,0.03211,0.004587,218.0,106,2.056604
BCCR4,0.012048,0.018072,0.012048,0.024096,0.0,0.006024,0.0,0.0,0.018072,0.024096,...,0.0,0.006024,0.018072,0.0,0.0,0.018072,0.012048,166.0,89,1.865169
CCCOMA3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.076923,0.0,0.0,0.0,0.153846,0.0,13.0,9,1.444444
CCCOMA4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.263158,0.0,0.0,0.0,0.0,0.0,19.0,10,1.9
CCCOMS3,0.01107,0.01845,0.0,0.00369,0.0,0.00369,0.00369,0.0,0.02583,0.0,...,0.0,0.051661,0.02952,0.0,0.0,0.01845,0.0,271.0,103,2.631068
CCCOMS4,0.0,0.054299,0.004525,0.022624,0.0,0.004525,0.0,0.0,0.049774,0.0,...,0.0,0.0181,0.036199,0.0,0.004525,0.0181,0.004525,221.0,108,2.046296
CCFNEC3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,...,0.0,0.052632,0.0,0.0,0.0,0.052632,0.052632,19.0,11,1.727273
CCUNDC1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.231343,...,0.0,0.018657,0.0,0.0,0.0,0.007463,0.037313,268.0,188,1.425532


In [85]:
wl_totcap=wl.groupby(['Class']).sum()[["Total", "Cap"]]
wl_totcap['Ratio']=wl_totcap["Total"]/wl_totcap["Cap"]
wl_totcap=wl_totcap.reset_index().drop(43)
wl_totcap=wl_totcap.sort_values(by="Ratio")
wl_totcap

  wl_totcap=wl.groupby(['Class']).sum()[["Total", "Cap"]]


Unnamed: 0,Class,Total,Cap,Ratio
34,COMSW4773,0.0,60.0,0.0
7,COMSE6915,0.0,25.0,0.0
39,CSEEW4121,1.0,170.0,0.005882
10,COMSW1404,5.0,160.0,0.03125
30,COMSW4721,27.0,160.0,0.16875
9,COMSW1004,267.0,796.0,0.335427
4,COMSE6261,12.0,35.0,0.342857
40,CSEEW4840,46.0,110.0,0.418182
8,COMSE6998,235.0,490.0,0.479592
24,COMSW4182,20.0,40.0,0.5


In [86]:
classes=wl_totcap[32:].index.tolist()
len(classes)

11

In [87]:
frac=wl_totcap[32:]["Total"].sum()/wl_totcap["Total"].sum()
frac

0.39601399665297427

In [88]:
cap=wl_totcap[32:]["Cap"].tolist()
cap

[280.0, 100.0, 398.0, 30.0, 360.0, 60.0, 300.0, 40.0, 75.0, 30.0, 100.0]

In [89]:
mu=wl_totcap["Total"].sum()/wl_final["students"].sum()
mu

2.733056133056133

In [90]:
prob = sp.poisson.cdf(0, mu)
prob

0.06502027510346889

In [91]:
wl_final["students"].sum()*(1-prob)*frac

890.4874636380856

In [92]:
wl_total=wl_totcap[32:]["Total"].sum()
class_prob=wl_totcap[32:]["Total"]/wl_total

In [93]:
def sample_class_num(mean, N):
    sample=np.round(np.random.poisson(mean, N))
    sample[sample<0]=0
    return sample

# randomly chooses courses according to the distribution
def course_ranking(class_num, course_prob):
    if(np.all(course_prob == 0)):
        print("all 0")
        return np.zeros(class_num)
    return np.random.choice(11,min(class_num, np.count_nonzero(course_prob)),p=course_prob, replace=False)

# for a certain group compute a sample of course rankings
def sample_course_ranking(size):
    class_num=np.repeat(3,size)
    rank=[course_ranking(x, class_prob) for x in class_num]
    return rank

def generate_demand_set(arr, max_bundle_size):
    possible_bundles = []
    for i in range(len(arr)):
        for j in range(i + 1, len(arr) + 1):
            bundle = arr[i:j]
            possible_bundles.append((sum(bundle), bundle))
    #put in order of utility
    possible_bundles.sort(key=lambda x: (x[0], len(x[1])), reverse=True)
    #get rid of bundles that are too large
    final_possible_bundles = [x[1] for x in possible_bundles if len(x) <= max_bundle_size]
    return final_possible_bundles

def one_hot_encode(input_list):
    encoded_vector = np.zeros(11)
    encoded_vector[input_list] = 1
    return encoded_vector.tolist()

def demand_set_to_one_hot(all_demand_set):
	one_hot_demand_sets = []
	for student_ds in all_demand_set: 
		student_one_hot_ds = []
		for bundle in student_ds: 
			one_hot = one_hot_encode(bundle)
			student_one_hot_ds.append(one_hot)
		one_hot_demand_sets.append(student_one_hot_ds)
	return one_hot_demand_sets

def get_all_demand_set(rank,max_bundle_size=3):
    all_demand_sets = []
    for student in rank:
        demand_set = generate_demand_set(student, max_bundle_size)
        all_demand_sets.append(demand_set)

    return all_demand_sets

def pad_arrays(allocations):
	max_len = len(max(allocations, key=len)) 
	for allocation in allocations: 
		while len(allocation) < max_len: 
			allocation.append(allocation[-1])
	return allocations

In [94]:
def generate_full_student_dataset(size): 
	complete_demand_set = [] 

	rank=sample_course_ranking(size)
	all_demand_set=get_all_demand_set(rank,3)
	filtered_demand_set = []
	for ds in all_demand_set:
		if len(ds) == 0:
			continue
		filtered_demand_set.append(ds)
		
	one_hot_demand_set = demand_set_to_one_hot(filtered_demand_set)
	complete_demand_set.extend(one_hot_demand_set)

	return complete_demand_set

In [95]:
def get_prices_for_each_allocation(prices, student_demand_set):
	price_for_each_demand_set = []
	for demand_set in student_demand_set: 
		prices_np = np.array(prices)
		demand_set_np = np.array(demand_set).T
		total_price = prices_np.dot(demand_set_np)
		price_for_each_demand_set.append(total_price[0])
	
	return price_for_each_demand_set

def budget_perterbations(prices, student_demand_set, epsilon,delta, initial_budget): 
	# input: prices for each course & demand set for a student 
	# prices = [.2, .3, .1, .5] (ie, price of course at idx 0 is 0.3, price of course at idx 1 is 0.2)
	# student_demand_set = [[1,1,0,0], [1,0,0,0],[0,1,0,0]] where a student's most preferred bundle is [1,1,0,0] and they get classes 0 and 1
	# second preferred bundle is [1,0,0,0] where the student would get only class 0, etc. 

	# output: budget allocation for each student, ie if they spent 0.2 to 0.5 (noninclusive), they would want allocation [1,0,0,0]
	# if they spent 0.5 or more, they would want allocation [1,1,0,0]

	#idx 0 price is for the top ranked bundle 
	alloc=[]
	budgets=[]
	budget=initial_budget+epsilon 
	for i in student_demand_set: 
		cost=np.dot(prices, i)
		if cost>budget:
			continue
		alloc.append(i)
		budgets.append(max(cost,initial_budget-epsilon))
		while budget>initial_budget-epsilon and budget>=cost:
			budget-=delta
		if budget <= initial_budget-epsilon:
			break
	if len (alloc)==0:
		alloc.append(np.zeros(11))
		budgets.append(0)
	return (alloc, budgets)


In [96]:
# prices=np.repeat(0.1,11)
# student_demand_set=dataset[0]
# budget_perterbations(prices, student_demand_set, 0.2,0.01,0.3)

In [97]:

def clearing_error_optimizer(a, c, p):
	n=len(a)
	m=len(c)
	ki=len(a[0])
	# Create a new model
	model = gp.Model("Clearing_Error_Minimization")

	# Decision variables
	# These are the dimensions of the decision variable array
	# array of binary decision variables with n rows and ki columns
	# z = model.addVars(m, lb=0.0, vtype=GRB.INTEGER, name="z")
	# x = model.addVars(n, ki, vtype=GRB.BINARY, name="x")

	# # Objective function: Minimize the l1-norm of vector z
	# model.setObjective(gp.quicksum(z[j] for j in range(m)), sense=GRB.MINIMIZE)

# clearing error variables
	z = model.addVars(m, vtype=GRB.INTEGER, name="z")
	y = model.addVar(name="y")

	x = model.addVars(n, ki, vtype=GRB.BINARY, name="x")


    # OBJECTIVE: minimize y, which will be equal to the 1-norm of z
	model.setObjective(y, sense=GRB.MINIMIZE)


    # Constraints

    #Set y == ||z||_1
	model.addGenConstrNorm(y, z, 1.0, "normconstr")
	# Constraints
	for j in range(m):
		if p[j]>0:
			model.addConstr(gp.quicksum(x[i, l] * a[i][l][j] for i in range(n) for l in range(ki)) == c[j] + z[j], f"clearing_error_positive_{j}")
		if p[j]==0:
			model.addConstr(gp.quicksum(x[i, l] * a[i][l][j] for i in range(n) for l in range(ki)) <= c[j] + z[j], f"clearing_error_nonnegative_{j}")
	
	for i in range(n):
		model.addConstr(gp.quicksum(x[i, l] for l in range(ki)) == 1, f"one_schedule_per_student_{i}")

	# Solve the model
	model.optimize()

	# Check optimization status
	if model.status == GRB.OPTIMAL:
		# Access the optimal solution
		optimal_x = model.getAttr("x", x)
		optimal_z = model.getAttr("x", z)
		optimal_obj = model.ObjVal

		# Print or use the optimal solution as needed
		print("Optimal x values:", optimal_x)
		print("Optimal z values:", optimal_z)
		return (optimal_x, optimal_z, optimal_obj)
	else:
		print(f"Optimization terminated with status {model.status}")
		# Check if the model is infeasible
		model.computeIIS()
		print("\nInfeasible constraints:")
		for c in model.getConstrs():
			if c.IISConstr:
				print(c.constrName)
		return (None, None, None)




In [106]:
#example input
def show_classes_per_student(x,a, budgets): 
	student_classes = {}
	student_budgets={}
	for key, value in x.items(): 
		bundle = a[key[0]][key[1]]
		if value != 0: 
			if key[0] not in student_classes: 
				student_demand_set = []
				student_demand_set.append([index for index, value in enumerate(bundle) if value == 1])
				student_classes[key[0]] = student_demand_set
				budget = budgets[key[0]][key[1]]
				student_budgets[key[0]] = budget[0]

	return student_classes, student_budgets

def show_students_per_class(x, a):
	class_students = {}
	for key, value in x.items(): 
		if value == 0:
			continue
		bundle = a[key[0]][key[1]]
		for i, class_num in enumerate(bundle):
			if class_num == 1 and i not in class_students: 
				class_students[i] = [key[0]]
			elif class_num == 1: 
				class_students[i].append(key[0])
	return class_students

def get_budget_perterbations(prices, one_hot_demand_set,epsilon, delta,initial_budget):
	all_budget_perterbations = []
	all_budget_intervals = []
	for student_demand_set in one_hot_demand_set: 
		allocations, budget_intervals = budget_perterbations(prices, student_demand_set, epsilon, delta, initial_budget)
		if allocations != [] and budget_intervals != []:
			all_budget_perterbations.append(allocations)
			all_budget_intervals.append(budget_intervals)
	allocations = pad_arrays(all_budget_perterbations)
	budget_intervals = pad_arrays(all_budget_intervals)
	return allocations, budget_intervals

def pretty_print_output(x, prices,z,a,b, f):
	student_classes, student_budgets = show_classes_per_student(x,a,b)
	for key in student_classes: 
		f.write("student: "+str(key)+ " classes: "+str(student_classes[key]) +" budget: "+str(student_budgets[key])+ "\n")

	classes_per_student = show_students_per_class(x, a)
	for key in classes_per_student: 
		f.write("class: "+ str(key)+" students: "+ str(classes_per_student[key])+ "\n")
		f.write("class: " + str(key) + " capacity: " + str(len(classes_per_student[key])) + "\n")
	string_list = [str(float_value) for float_value in prices[0]]
	prices_string = ' '.join(string_list)
	f.write("prices: "+ prices_string+ "\n")
	string_list_z = [str(float_value) for float_value in z]
	prices_string_z = ' '.join(string_list_z)
	f.write("z: "+ prices_string_z+ "\n")

def aceei(delta, epsilon, prices, size): 
	f = open("out"+str(dt.datetime.now())+".txt", "w+")
	clearing_error=[]
	optimum=9999999
	i=0
	one_hot_demand_set=generate_full_student_dataset(size)
	x=0
	while optimum>=1: 
		# try:
		a, b = get_budget_perterbations(prices, one_hot_demand_set, epsilon,delta, 0.1)
		#print(np.array(a).shape, np.array(b).shape)	
		c = [int(i/(1200/size)) for i in cap]
		print("cap", c)
		p = prices[0]
		x, z, optimum= clearing_error_optimizer(a, c, p)
		# print("Epoch: " + str(i) + ", optimum: " + str(optimum) + "\n")
		f.write("\nEpoch: " + str(i)+ ", optimum: " + str(optimum) + "\n")
		# 	pretty_print_output(x, prices, z.values(), a,b , f)
		print(z.values())
		prices = (np.array(prices) + np.array(list(z.values()))*delta).tolist() 
		clearing_error.append(optimum)
		i+=1
		# except: 
		# 	print("error")
		# 	return (0,delta,size,prices, x, clearing_error)
	f.close()
	return (1,delta,size,prices, x, clearing_error)


In [99]:
# deltas=[0.0001,0.00001,0.000005]
# sizes=[50,100,150]
# results=[]
# for i in deltas: 
#   for j in sizes: 
#     res=aceei(i, 0.01, [[0] * 11], j)
#     results.append(res)

In [109]:
res = aceei(.000005, .01, [[0] * 11], 100)

cap [23, 8, 33, 2, 30, 5, 25, 3, 6, 2, 8]
Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (mac64[arm] - Darwin 22.5.0 22F66)

CPU model: Apple M2 Pro
Thread count: 12 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 111 rows, 112 columns and 411 nonzeros
Model fingerprint: 0xc91de20d
Model has 1 general constraint
Variable types: 1 continuous, 111 integer (100 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 3e+01]
Presolve removed 111 rows and 112 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 1 (of 12 available processors)

Solution count 1: 155 

Optimal solution found (tolerance 1.00e-04)
Best objective 1.550000000000e+02, best bound 1.550000000000e+02, gap 0.0000%
Optimal x values: {(0, 0): 1.0, (1, 0): 1.0, (2, 0): 1.0, (3

AttributeError: 'NoneType' object has no attribute 'values'

In [None]:
# for i in results:
#   print(i[0:3],i[5][len(i[5])-1])

In [None]:
res=aceei(0.000005, 0.02, [[0] * 11])

TypeError: aceei() missing 1 required positional argument: 'size'

In [None]:
error = results[6][5]

## Arrange the plot

#empirical
x_axis = np.array(range(0, len(error)))
plt.plot(x_axis, error, label="Clearing error")
plt.ylabel("Clearing error (1-norm of z)")
plt.xlabel("Iteration")

#theory bound
alpha = np.sqrt(min(2*100, len(cap))* len(cap))/2
plt.axhline(alpha, color="r", label="Theory bound", linestyle="dashed")


plt.title(f"Clearing error for n={50}, m={11}, k={3}, d = {0.0001}, e = {0.01}")
plt.legend(loc="upper right")

plt.show()