In [1]:
import numpy as np
from tqdm import tqdm

from plotly.subplots import make_subplots
import plotly.graph_objects as go

from gale_shapley import run_matching
from oneshot import oneshot

In [2]:
def dict_to_list(dictionary):
	lst = [None] * len(dictionary.keys())
	for i in range(len(lst)):
		lst[i] = dictionary.get(i)
	return lst

In [3]:
def compute_metrics(bins):
	counts = np.array([len(bin) for bin in bins])
	num_students = np.sum(counts)

	match_ratio = 1 - (counts[-1] / num_students)
	average_placement = np.sum(counts[1:] / num_students * np.array(range(1, 13)))

	return match_ratio, average_placement

In [4]:
def merge(a, b):
	if len(a) != len(b):
		raise ValueError()

	res = [[]] * len(a)
	for i in range(len(res)):
		res[i] = a[i] + b[i]

	return res

In [5]:
stages = {
	1: { 'sel': 0, 'rnk': 0, 'adm': 1, 'max_schools': True },  # Rand selection, rand ordering, open schools
	2: { 'sel': 0, 'rnk': 1, 'adm': 1, 'max_schools': True },  # Rand selection, like ordering, open schools
	3: { 'sel': 1, 'rnk': 0, 'adm': 1, 'max_schools': True },  # Pop selection, rand ordering, open schools
	4: { 'sel': 1, 'rnk': 1, 'adm': 1, 'max_schools': True },  # Pop selection, like ordering, open schools
	5: { 'sel': None, 'rnk': None, 'adm': 1, 'max_schools': True },  # Combination students, open schools
	6: { 'sel': None, 'rnk': None, 'adm': 2, 'max_schools': True },  # Combination students, EdOpt schools
	7: { 'sel': None, 'rnk': None, 'adm': 3, 'max_schools': True },  # Combination students, screen schools
	8: { 'sel': None, 'rnk': None, 'adm': None, 'max_schools': True },  # Combination students & schools
	9: { 'sel': None, 'rnk': None, 'adm': None, 'max_schools': False }  # Combination students & schools, variable list length
}

In [7]:
# Stage 9: compute average top choice likeability by list length
likeability_bins = np.zeros((10, 12))
length_bins = np.zeros((10, 12))

for seed in range(10):
	students, schools, student_info, school_info = oneshot(seed=seed, return_list=True, **stages[9])

	for ranking in students.values():
		likeability = school_info[ranking[0]][4]
		index = len(ranking) - 1
		likeability_bins[seed, index] += likeability
		length_bins[seed, index] += 1

In [12]:
likeability_avgs = np.mean(likeability_bins / length_bins, axis=0)
for i, e in enumerate(likeability_avgs):
	print(f'Length: {i+1} => likeability: {e}')

Length: 1 => likeability: 0.2916745706117132
Length: 2 => likeability: 0.32851862592815
Length: 3 => likeability: 0.3508518752428666
Length: 4 => likeability: 0.36742644514529305
Length: 5 => likeability: 0.38182114983990334
Length: 6 => likeability: 0.3928149872640191
Length: 7 => likeability: 0.40128947303517765
Length: 8 => likeability: 0.41054376131993936
Length: 9 => likeability: 0.41620230062623265
Length: 10 => likeability: 0.42279611033418485
Length: 11 => likeability: 0.4230655384725214
Length: 12 => likeability: 0.4311853448956386


In [16]:
fig = make_subplots()
fig.add_trace(go.Scatter(x=list(range(1, 13)), y=likeability_avgs, marker={'size': 20}, line={'width': 5}))

fig.update_layout(
	width=1600,
	height=1200,
	showlegend=False,
	xaxis=dict(title='Ranking length', tickmode='array', tickvals=list(range(1, 13))),
	yaxis=dict(title='Average top choice likeability', range=[0, 0.5]),
	margin={ 't': 0, 'l': 100, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato', 'size': 36 }
)

fig.show()

In [6]:
# Stages 8/9: compute the average likeability by placement
students, schools, student_info, school_info = oneshot(seed=11, return_list=True, **stages[8])
likeability_bins_2 = [[]] * 12

for ranking in students.values():
	for i, dbn in enumerate(ranking):
		likeability = school_info[dbn][4]
		likeability_bins_2[i] = likeability_bins_2[i] + [likeability]

students, schools, student_info, school_info = oneshot(seed=11, return_list=True, **stages[9])
likeability_bins_4 = [[]] * 12

for ranking in students.values():
	for i, dbn in enumerate(ranking):
		likeability = school_info[dbn][4]
		likeability_bins_4[i] = likeability_bins_4[i] + [likeability]

In [7]:
likeability_means_2 = [np.mean(bin) for bin in likeability_bins_2]
likeability_means_4 = [np.mean(bin) for bin in likeability_bins_4]

for a, b in zip(likeability_means_2, likeability_means_4):
	print(f'Stage 8: {a:.3f}; stage 9: {b:.3f}')

Stage 8: 0.434; stage 9: 0.397
Stage 8: 0.365; stage 9: 0.332
Stage 8: 0.339; stage 9: 0.301
Stage 8: 0.319; stage 9: 0.278
Stage 8: 0.302; stage 9: 0.260
Stage 8: 0.288; stage 9: 0.246
Stage 8: 0.271; stage 9: 0.236
Stage 8: 0.257; stage 9: 0.228
Stage 8: 0.246; stage 9: 0.223
Stage 8: 0.236; stage 9: 0.218
Stage 8: 0.224; stage 9: 0.212
Stage 8: 0.207; stage 9: 0.204


In [3]:
fig = make_subplots()
fig.add_trace(go.Scatter(x=list(range(1, 13)), y=likeability_means_2, name='Stage 8', marker={'size': 20}, line={'width': 5}))
fig.add_trace(go.Scatter(x=list(range(1, 13)), y=likeability_means_4, name='Stage 9', marker={'size': 20}, line={'width': 5}))

fig.update_layout(
	width=1600,
	height=1200,
	showlegend=True,
	xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=list(range(1, 13))),
	yaxis=dict(title='Average likeability', range=[0, 0.5]),
	legend={ 'xanchor': 'right', 'x': 1, 'yanchor': 'top', 'y': 1 },
	margin={ 't': 0, 'l': 100, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato', 'size': 36 }
)

fig.show()

In [95]:
# Stage 9: compute the list lengths of all students and plot match counts w.r.t. number of students with as many schools
students, schools, student_info, school_info = oneshot(seed=11, return_list=True, **stages[9])
lengths = [0] * 12

for ranking in students.values():
	lengths[len(ranking) - 1] += 1

bins_dict, _, _ = run_matching(students, student_info, schools, school_info)

In [96]:
import plotly.express as px

bins = dict_to_list(bins_dict)
counts = [len(bin) for bin in bins]

choices = list(range(1, 13)) + [13.5]
div = np.hstack([np.cumsum(lengths[::-1])[::-1], [np.sum(counts)]])

fig = px.bar(x=choices, y=counts/div)

fig.update_layout(
	width=600,
	height=400,
	showlegend=False,
	xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=choices, ticktext=choices[:-1]+['Unmatched']),
	yaxis=dict(title='Students matched', range=[0, 1], tickformat='.0%'),  # Histogram
	margin={ 't': 20, 'l': 0, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato' }
)

fig.show()

In [33]:
res = input('This script takes about 12 minutes to run. Are you sure you want to do it?')

if res.lower() == 'y':
	unique_schools_all = np.zeros((10, 9, 12))
	capacities_top_all = np.zeros((10, 9))
	capacities_all = np.zeros((10, 9))

	for stage, params in stages.items():
		for seed in range(10):
			# Compute the number of unique schools by placement and the average capacity of top choice schools and ranked schools
			students, schools, student_info, school_info = oneshot(seed=seed, return_list=True, **params)

			for i in range(12):
				unique_schools_all[seed, stage-1, i] = len(np.unique([ranking[i] for ranking in students.values() if len(ranking) > i]))

			capacities_top_all[seed, stage-1] += np.mean([school_info[ranking[0]][1] for ranking in students.values()])
			capacities_all[seed, stage-1] += np.mean([np.mean([school_info[dbn][1] for dbn in ranking]) for ranking in students.values()])

In [37]:
unique_schools_avg = np.mean(unique_schools_all, axis=0)
capacities_top_avg = np.mean(capacities_top_all, axis=0)
capacities_avg = np.mean(capacities_all, axis=0)

for stage in stages.keys():
	print(f'Stage {stage}: {unique_schools_avg[stage-1].astype(int)} unique choices. Average capacity: top {capacities_top_avg[stage-1]:.2f}, ranked {capacities_avg[stage-1]:.2f}')

Stage 1: [439 439 439 439 439 439 439 439 439 439 439 439] unique choices. Average capacity: top 165.66, ranked 166.08
Stage 2: [227 278 312 339 355 365 364 357 340 312 278 228] unique choices. Average capacity: top 283.50, ranked 166.08
Stage 3: [435 435 435 434 434 434 435 434 435 435 435 434] unique choices. Average capacity: top 284.53, ranked 284.42
Stage 4: [104 152 193 230 271 308 338 364 380 389 376 338] unique choices. Average capacity: top 407.86, ranked 284.42
Stage 5: [439 439 439 439 439 439 439 439 439 439 439 439] unique choices. Average capacity: top 283.68, ranked 224.63
Stage 6: [439 439 439 439 439 439 439 439 439 439 439 439] unique choices. Average capacity: top 283.68, ranked 224.63
Stage 7: [439 439 439 439 439 439 439 439 439 439 439 439] unique choices. Average capacity: top 283.68, ranked 224.63
Stage 8: [439 439 439 439 439 439 439 439 439 439 439 439] unique choices. Average capacity: top 283.68, ranked 224.63
Stage 9: [439 439 439 439 439 439 439 439 439 43

In [None]:
np.save('./Data/Metrics/unique_schools.npy', unique_schools_avg)
np.save('./Data/Metrics/capacity_top.npy', capacities_top_avg)
np.save('./Data/Metrics/capacity_rank.npy', capacities_avg)

In [63]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots()
for stage in range(1, 5):
	fig.add_trace(go.Scatter(x=list(range(1, 13)), y=unique_schools_avg[stage-1], name=f'Stage {stage}', marker={'size': 20}, line={'width': 5}))

fig.update_layout(
	width=1600,
	height=1200,
	showlegend=True,
	xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=list(range(1, 13))),
	yaxis=dict(title='Unique schools', range=[0, 500]),
	legend={ 'xanchor': 'right', 'x': 1, 'yanchor': 'bottom', 'y': 0 },
	margin={ 't': 0, 'l': 100, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato', 'size': 36 }
)

fig.show()

| Stage     | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|-----------|---|---|---|---|---|---|---|---|---|
|Top choices|439|227|435|104|439|439|439|439|439|
|Top seats  |166|282|285|408|285|285|285|285|276|
|Avg seats  |166|166|284|284|224|224|224|224|222|

In [19]:
res = input('This script takes forever to run. Are you sure you want to do it?')

if res.lower() == 'y':
	# Compute statistics for all stages
	stats = {}
	match_ratios = np.zeros((9))
	average_placements = np.zeros((9))

	for stage, params in stages.items():
		print(f'\nComputing metrics for stage {stage}')

		bins_combined = [[]] * 13
		gpa_by_placement = [[]] * 13
		# Compute metrics over different random states
		for random_state in tqdm(range(1, 11)):
			# Generate input
			students, schools, student_info, school_info = oneshot(seed=random_state, return_list=True, **params)
			# Run the matching
			bins, matches, _ = run_matching(students, student_info, schools, school_info)
			# Merge bins over random states
			bins_list = dict_to_list(bins)
			bins_combined = merge(bins_combined, bins_list)
			for student_id, outcome in matches.items():
				gpa = student_info[student_id][5]
				placement = outcome['rank'] or 13
				gpa_by_placement[placement - 1] = gpa_by_placement[placement - 1] + [gpa]

		# Save stats for later
		counts = np.array([len(bin) for bin in bins_combined])
		medians = [sorted(bin)[len(bin)//2] for bin in bins_combined]
		gpa_avg = [np.mean(pl) for pl in gpa_by_placement]
		stats[stage] = { 'counts': counts, 'medians': medians, 'gpas': gpa_avg }

		# Compute the metrics and append to results
		match_ratio, average_placement = compute_metrics(bins_combined)
		match_ratios[stage - 1] += match_ratio
		average_placements[stage - 1] += average_placement


Computing metrics for stage 1


100%|██████████| 10/10 [07:31<00:00, 45.11s/it]



Computing metrics for stage 2


100%|██████████| 10/10 [24:09<00:00, 144.97s/it]



Computing metrics for stage 3


100%|██████████| 10/10 [08:22<00:00, 50.27s/it]



Computing metrics for stage 4


100%|██████████| 10/10 [23:40<00:00, 142.09s/it]



Computing metrics for stage 5


100%|██████████| 10/10 [16:26<00:00, 98.60s/it] 



Computing metrics for stage 6


100%|██████████| 10/10 [16:34<00:00, 99.43s/it] 



Computing metrics for stage 7


100%|██████████| 10/10 [16:56<00:00, 101.64s/it]



Computing metrics for stage 8


100%|██████████| 10/10 [16:52<00:00, 101.27s/it]



Computing metrics for stage 9


100%|██████████| 10/10 [20:13<00:00, 121.35s/it]


In [4]:
np.save('./Data/Metrics/stats.npy', stats, allow_pickle=True)
np.save('./Data/Metrics/match_ratios.npy', match_ratios)
np.save('./Data/Metrics/average_placements.npy', average_placements)

In [3]:
stats = np.load('./Data/Metrics/stats.npy', allow_pickle=True).item()
match_ratios = np.load('./Data/Metrics/match_ratios.npy')
average_placements = np.load('./Data/Metrics/average_placements.npy')

In [9]:
for stage in stages.keys():
	print(f'Stage {stage}: mr = {match_ratios[stage-1]}, ap = {average_placements[stage-1]}')

Stage 1: mr = 0.9260757894736842, ap = 2.0215536842105264
Stage 2: mr = 0.9869108771929824, ap = 3.8595550877192983
Stage 3: mr = 0.9639887719298246, ap = 1.5317277192982455
Stage 4: mr = 0.9484112280701754, ap = 6.221454035087719
Stage 5: mr = 0.9830343859649123, ap = 2.165397894736842
Stage 6: mr = 0.9830821052631579, ap = 2.162769122807017
Stage 7: mr = 0.9829600000000001, ap = 2.1648252631578946
Stage 8: mr = 0.983121403508772, ap = 2.1668898245614034
Stage 9: mr = 0.9541150877192982, ap = 1.6068856140350878


In [55]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def generate_hex_labels():
	labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f']
	ticks = [int('0x' + str(l) + '0000000', 0) for l in labels]
	return ticks, labels

def hex_to_int(numbers: list[str]) -> np.array:
	numbers_truncated = [num[:8] if len(num) > 8 else num for num in numbers]
	return np.array([int('0x' + num, 0) for num in numbers_truncated])

In [59]:
stage = 4  # 1 ... 9
counts, medians, gpas = stats[stage]['counts'], stats[stage]['medians'], stats[stage]['gpas']

num_students = np.sum(counts)
choices = list(range(1, 13)) + [13.5]

fig = make_subplots(specs=[[{'secondary_y': True}]])
fig.add_trace(go.Bar(x=choices, y=counts/num_students, name='Percentage of students matched'), secondary_y=False)
fig.add_trace(go.Scatter(x=list(range(1, 13)), y=unique_schools_avg[stage-1], name='Unique schools'), secondary_y=True)
# fig.add_trace(go.Scatter(x=choices, y=gpas, name='Average GPA', marker_color='rgba(0,204,150,256)'), secondary_y=True)
# fig.add_trace(go.Scatter(x=choices, y=hex_to_int(medians), name='Median lottery number'), secondary_y=True)

ticks_hex, labels_hex = generate_hex_labels()
fig.update_layout(
	width=600,
	height=400,
	showlegend=False,
	xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=choices, ticktext=choices[:-1]+['Unmatched']),
	yaxis=dict(title='Students matched', range=[0, 1], tickformat='.0%'),  # Histogram
	yaxis2=dict(title='Unique schools', showgrid=False),
	# yaxis2=dict(title='Average GPA', range=[0, 100]),  # GPA plot
	# yaxis2=dict(title='Median lottery number (first digit)', tickmode='array', tickvals=ticks_hex, ticktext=labels_hex, range=[0, 4.295e9], showgrid=False),
	margin={ 't': 20, 'l': 0, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato' }
)

fig.show()

In [15]:
choices = list(range(1, 13)) + [13.5]
ticks_hex, labels_hex = generate_hex_labels()

for stage in range(1, 10):
	counts, medians, gpas = stats[stage]['counts'], stats[stage]['medians'], stats[stage]['gpas']
	num_students = np.sum(counts)

	fig = make_subplots(specs=[[{'secondary_y': True}]])
	fig.add_trace(go.Bar(x=choices, y=counts/num_students, name='Percentage of students matched'), secondary_y=False)
	fig.add_trace(go.Scatter(x=choices, y=gpas, name='Average GPA', marker_color='rgba(0,204,150,256)', marker={'size': 20}, line={'width': 5}), secondary_y=True)
	# fig.add_trace(go.Scatter(x=choices, y=hex_to_int(medians), name='Median lottery number', marker={'size': 20}, line={'width': 5}), secondary_y=True)

	fig.update_layout(
		width=1600,
		height=1200,
		showlegend=False,
		xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=choices, ticktext=choices[:-1]+['Unmatched']),
		yaxis=dict(title='Students matched', range=[0, 1], tickformat='.0%'),
		yaxis2=dict(title='Average GPA', range=[0, 100]),
		# yaxis2=dict(title='Median lottery number (first digit)', tickmode='array', tickvals=ticks_hex, ticktext=labels_hex, range=[0, 4.295e9], showgrid=False),
		margin={ 't': 50, 'l': 0, 'b': 0, 'r': 0 },
		font={ 'family': 'Lato', 'size': 36 }
	)

	fig.write_image(f'../Report/hist_stage_{stage}_gpa.png')

In [28]:
gpas = stats[stage]['gpas']

choices = list(range(1, 13)) + [13.5]

fig = make_subplots()
fig.add_trace(go.Scatter(x=choices, y=stats[5]['gpas'], name='Stage 5', marker={'size': 20}, line={'width': 5}))
fig.add_trace(go.Scatter(x=choices, y=stats[6]['gpas'], name='Stage 6', marker={'size': 20}, line={'width': 5}))
fig.add_trace(go.Scatter(x=choices, y=stats[7]['gpas'], name='Stage 7', marker={'size': 20}, line={'width': 5}))

fig.update_layout(
	width=1600,
	height=1200,
	showlegend=True,
	xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=choices, ticktext=choices[:-1]+['Unmatched']),
	yaxis=dict(title='Average GPA', range=[60, 80]),
	legend={ 'xanchor': 'right', 'x': 1, 'yanchor': 'top', 'y': 1 },
	margin={ 't': 0, 'l': 100, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato', 'size': 36 }
)

fig.show()