In [1]:
# setup python path
import sys
sys.path.append('/home/r2d9/workshop')

In [130]:
import os

import pandas as pd

from workshop.settings import DATA_PATH, RESULTS_PATH

ImportError: cannot import name 'RESULTS_PATH'

# Init

In [6]:
MIN_PPL = 125
MAX_PPL = 300

# Baseline : filling day in order of wishes
we parse families in the order of the given data and set them to the first of their choice that has available places.

In [20]:
data = pd.read_csv(os.path.join(DATA_PATH, "family_data.csv"), index_col="family_id")

constraints = pd.Series(0, index=[i for i in range(1, 101)], name="constraint")

for family_id, family_data in data.iterrows():
    family_size = family_data["n_people"]
    family_choices = family_data.drop("n_people")
    for choice in family_choices:
        if constraints.loc[choice] + family_size <= MAX_PPL:
            constraints.loc[choice] += family_size
            data.loc[family_id, "assigned_day"] = choice
            break

In [21]:
constraint_checker = data.groupby("assigned_day")["n_people"].sum()
constraint_checker.min() >= MIN_PPL, constraint_checker.max() <= MAX_PPL, "Sounds good doesn't work - D.J. Trump"

(False, True, "Sounds good doesn't work - D.J. Trump")

# Baseline v2 : taking each day and filling
we parse days and fill places (at least 125 then stop) by looking who is the most willing to go on that day

In [70]:
data = pd.read_csv(os.path.join(DATA_PATH, "family_data.csv"), index_col="family_id")

n_choices = ["choice_{}".format(i) for i in range(10)]  # for convenience when parsing data columns
assigned = list()  # holds people already assigned

for day in [i for i in range(1, 101)]:
    
    day_people_number = 0
    
    for n_choice in n_choices:
        
        if day_people_number < MIN_PPL:
            
            curr_most_interested = data[data[n_choice] == day]
            
            for family_id, family_data in curr_most_interested.iterrows():
                if day_people_number < MIN_PPL and family_id not in assigned:
                    day_people_number += family_data["n_people"]
                    data.loc[family_id, "assigned_day"] = day
                    assigned.append(family_id)
        else:
            break

In [81]:
print(data["assigned_day"].isna().sum() == 0)  # unaffected families check (oops)
constraint_checker = data.groupby("assigned_day")["n_people"].sum(skipna=False)
constraint_checker.min() >= MIN_PPL, constraint_checker.max() <= MAX_PPL

False


(True, True)

# Baseline v1+2=3
Baselines above break constraints, we cannot randomly use parse data in a greedy manner. We will try to be a bit smarter on the way we parse data.

In [117]:
choices = data[[col for col in data.columns if "choice_" in col]]
sizes = data["n_people"]

In [118]:
potentials = pd.Series(0, index=range(1, 101))

for idx, fam_choices in choices.iterrows():
    potentials.loc[fam_choices] += sizes.loc[idx]

In [119]:
potentials = potentials.sort_values(ascending=True)

In [120]:
assignments = pd.Series(name="assigned_day")
attendances = pd.Series(name="attendance")

# given potentials fill the minimum capacity
for day, _ in potentials.iteritems():
    
    attendances.loc[day] = 0    
    possible_families = choices[choices == day].dropna(how="all").index.tolist()
    
    for family in possible_families:
        if family not in assignments.index:    
            assignments.loc[family] = day
            attendances.loc[day] += sizes.loc[family]
        if attendances.loc[day] > MIN_PPL:
            break

In [121]:
# fill leftovers families according to wishes
leftovers_choices = choices.loc[~choices.index.isin(assignments.index)]

for family, fam_choices in leftovers_choices.iterrows():
    
    for fam_choice in fam_choices:
        
        if attendances.loc[fam_choice] + sizes.loc[family] <= MAX_PPL:
            assignments.loc[family] = fam_choice
            attendances.loc[fam_choice] += sizes.loc[family]
            break

In [123]:
attendances.min() >= MIN_PPL, attendances.max() <= MAX_PPL, len(assignments) == 5000

(True, True, True)

In [128]:
assignments = assignments.reset_index().rename({"index": "family_id"}, axis=1)
assignments.to_csv(os.path.join(RESULTS_PATH, "baseline.csv"), index=False)