In [1]:
import math
import random
import statistics
import scipy.stats

We have 20 days. Each day we consider a candidate and can hire him or not.
We can hire only a single worked.
Each candidate was efficiency measured in story points per day.
The distribution of candidate efficiency is gamma distribution with a=2.
We'll compare several algorithms for hiring.

In [2]:
DAYS = 20
DIST = scipy.stats.gamma(a=2)

In [3]:
def probability_that_candidate_is_the_best(day, candidate_eff):
    remaining_days = DAYS - day
    points = remaining_days * candidate_eff
    prob = 1.0
    for _ in range(day + 1, DAYS):
        remaining_days -= 1
        prob *= DIST.cdf(points / remaining_days)
    return prob

In [4]:
def can_do_70_points(day, candidate_eff, _):
    """Hire a candidate who can do 70 story points in remaining time."""
    if day + 1 == DAYS:
        return True
    remaining_days = DAYS - day
    points = remaining_days * candidate_eff
    return points >= 70

def can_do_60_points(day, candidate_eff, _):
    """Hire a candidate who can do 60 story points in remaining time."""
    if day + 1 == DAYS:
        return True
    remaining_days = DAYS - day
    points = remaining_days * candidate_eff
    return points >= 60

def can_do_50_points(day, candidate_eff, _):
    """Hire a candidate who can do 50 story points in remaining time."""
    if day + 1 == DAYS:
        return True
    remaining_days = DAYS - day
    points = remaining_days * candidate_eff
    return points >= 50

def can_do_40_points(day, candidate_eff, _):
    """Hire a candidate who can do 40 story points in remaining time."""
    if day + 1 == DAYS:
        return True
    remaining_days = DAYS - day
    points = remaining_days * candidate_eff
    return points >= 40

def is_best_with_probability_90(day, candidate_eff, _):
    """Hire a candidate who will do more points than any future candidate with probability 90%."""
    if day + 1 == DAYS:
        return True
    return probability_that_candidate_is_the_best(day, candidate_eff) > 0.9

def is_best_with_probability_50(day, candidate_eff, _):
    """Hire a candidate who will do more points than any future candidate with probability 50%."""
    if day + 1 == DAYS:
        return True
    return probability_that_candidate_is_the_best(day, candidate_eff) > 0.5

def is_best_with_probability_10(day, candidate_eff, _):
    """Hire a candidate who will do more points than any future candidate with probability 10%."""
    if day + 1 == DAYS:
        return True
    return probability_that_candidate_is_the_best(day, candidate_eff) > 0.1

def is_best_with_probability_01(day, candidate_eff, _):
    """Hire a candidate who will do more points than any future candidate with probability 1%."""
    if day + 1 == DAYS:
        return True
    return probability_that_candidate_is_the_best(day, candidate_eff) > 0.01

def is_above_average(day, candidate, _):
    """Hire the first candidate who is above the average."""
    if day + 1 == DAYS:
        return True
    return candidate >= DIST.mean()

def is_better_than_first_37_percent(day, candidate, past_candidates):
    """Skip the first 37 percent of candidates and hire the first one who is better than all of them."""
    if day + 1 == DAYS:
        return True
    if day < DAYS / math.e:
        return False
    return candidate >= max(past_candidates)

ALGORITHMS = [can_do_70_points, can_do_60_points, can_do_50_points, can_do_40_points, is_best_with_probability_90, is_best_with_probability_50, is_best_with_probability_10, is_best_with_probability_01, is_above_average, is_better_than_first_37_percent]

In [5]:
def simulate(algorithm):
    candidates = list(DIST.rvs(size=DAYS))
    hire_days = []
    workdays = 0
    for day, candidate in enumerate(candidates):
        d = algorithm(day, candidate, candidates[:day])
        if d:
            days = DAYS - day
            workdays += days * candidate
            hire_days.append(day)
            if len(hire_days) == 1:
                break
    return workdays, hire_days, candidates

In [6]:
SIMULATIONS = 1000

results = []
for algorithm in ALGORITHMS:
    simulations = []
    for _ in range(SIMULATIONS):
        simulations.append(simulate(algorithm))
    results.append((algorithm, simulations))

print("name               achieved_60_points median    avg  best worst typical_first_hire_day found_most_efficient found_best")
for alg, simulations in results:
    success_rate = sum(1 for workdays, _, _ in simulations if workdays >= 60) / SIMULATIONS
    avg = statistics.mean(workdays for workdays, _, _ in simulations)
    median = statistics.median(workdays for workdays, _, _ in simulations)
    worst = min(workdays for workdays, _, _ in simulations)
    best = max(workdays for workdays, _, _ in simulations)
    typical_first_hire_day = statistics.median(hire_days[0] for _, hire_days, _ in simulations)
    found_efficient_rate = sum(1 for _, hire_days, candidates in simulations if candidates[hire_days[0]] == max(candidates)) / SIMULATIONS
    found_best_rate = sum(1 for _, hire_days, candidates in simulations if candidates[hire_days[0]] * (DAYS - hire_days[0]) == max(candidate * (DAYS - day) for day, candidate in enumerate(candidates))) / SIMULATIONS
    print(f"{alg.__name__:32} {success_rate:4.0%} {median:6.1f} {avg:6.1f} {best:5.0f} {worst:5.2f} {typical_first_hire_day:22.0f} {found_efficient_rate:20.0%} {found_best_rate:10.0%}")

name               achieved_60_points median    avg  best worst typical_first_hire_day found_most_efficient found_best
can_do_70_points                  50%   70.0   47.3   214  0.09                     10                  31%        44%
can_do_60_points                  70%   66.8   58.2   200  0.05                      3                  32%        54%
can_do_50_points                  57%   62.6   63.4   218  0.10                      2                  28%        59%
can_do_40_points                  42%   55.7   60.8   222  0.24                      1                  16%        47%
is_best_with_probability_90       41%   45.3   52.7   196  0.08                     11                  59%        38%
is_best_with_probability_50       61%   66.3   67.8   178  4.57                      4                  32%        62%
is_best_with_probability_10       45%   57.1   62.9   170 18.80                      1                  15%        50%
is_best_with_probability_01       34%   49.7   5

We're comparing the following statistics across 1000 simulations:
- What's the probability that we do at least 60 story points?
- What's the median, best and worst story points done?
- What's the probability that we hire the most efficient candidate?
- What's the probability that we hire the best candidate, i.e the one who will do the most story points in remaining time?





Possible variations of the problem:
- single employee vs several employees
- hire for current project (thus hire date affects the cost) vs hire for future project (thus hire date doesn't affect anything)
- can hire only current candidate vs can hire any past candidate
  (the combination "hire past candidates", "hire for future project" is trivial)