In [27]:
import numpy as np

In [35]:
# Total number of classes offered
N_CLASSES = 50
# Number of classes in which each student enrolls
N_CLASSES_PER_STUDENT = 30
# Total number of students
N_STUDENTS = 200

In [36]:
# Randomly select which classes each student will take.
enrollment = []
for i in range(N_STUDENTS):
    enrollment += [np.array(list(map(lambda x: int(x),
                                     sorted(np.linspace(0, N_CLASSES - 1, N_CLASSES),
                                            key = lambda _ : np.random.random()))))[0:N_CLASSES_PER_STUDENT]]

In [37]:
# generate a tendency for each class to be easy or difficult
# 0 baseline; negative indicates a "hard" class, positive an "easy" class
class_difficulties = np.random.normal(0, 5, (N_CLASSES, 1))

In [38]:
# generate a tendency for each student to be high performing or low performing
student_means = 100 - np.random.lognormal(np.log(6), np.log(2.5), (N_STUDENTS, 1))
grades = np.zeros((N_CLASSES, N_STUDENTS))
for i in range(N_STUDENTS):
    student_grades = np.random.normal(student_means[i], 3, (N_CLASSES_PER_STUDENT, 1))
    curr = 0
    for j in range(N_CLASSES):
        if j in enrollment[i]:
            # generate a grade from a normal distribution centered around the student's tendency,
            # offset by a value chosen from a normal distribution centered around the class' difficulty
            grades[j][i] = student_grades[curr] + np.random.normal(class_difficulties[0], 3)
            curr += 1
        else:
            grades[j][i] = None

In [39]:
# breaks = [60, 63, 67, 70, 73, 77, 80, 83, 87, 90, 93, 97]
breaks = [60, 70, 80, 90]
def letter(num):
    if np.isnan(num):
        return None
    for i in range(len(breaks)):
        if num < breaks[i]:
            return i
    return len(breaks)

In [40]:
# translate students' numerical grades into letter grades
letters = np.zeros((N_CLASSES, N_STUDENTS))
for i in range(len(letters)):
    for j in range(len(letters[i])):
        letters[i][j] = letter(grades[i][j])

In [41]:
# find the mean grade in each class
means = np.zeros((N_CLASSES, 1))
for i in range(len(means)):
    total = 0
    n = 0
    for j in range(len(letters[i])):
        if not(np.isnan(letters[i][j])):
            total += letters[i][j]
            n += 1
    means[i] = total/n

In [42]:
# each student gets a "score" of their grade minus the mean grade
# in each class they took
points = np.zeros((N_CLASSES, N_STUDENTS))
for i in range(len(points)):
    for j in range(len(points[i])):
        if np.isnan(letters[i,j]):
            points[i,j] = None
        else:
            points[i,j] = (letters[i,j] - means[i])

In [43]:
# rank students by the mean of their score differentials (higher is better),
# breaking ties with standard deviation (lower is better)
ranking = []
for i in range(N_STUDENTS):
    valid_scores = []
    for j in range(N_CLASSES):
        if not(np.isnan(points[j,i])):
            valid_scores += [points[j, i]]
    valid_scores = np.array(valid_scores)
    ranking += [(i, valid_scores.mean(), valid_scores.std())]

In [44]:
N_DECILE = int(N_STUDENTS/10)

# top 10% (higher list index is better)
top_10 = sorted(ranking, key = lambda x: (x[1], -1*x[2]))[-1*N_DECILE:]
# second-best 10% (higher list index is better)
next_10 = sorted(ranking, key = lambda x: (x[1], -1*x[2]))[-2*N_DECILE:-1*N_DECILE]