In [65]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [66]:
#### CONFIG ####
hours_block_mean = 4
hours_block_sd = 2
sd_slope = 0.1
sd_int = -4

In [67]:
#### INPUTS ####
projects = pd.DataFrame({
    'Projects' : ["A", "B", "C", "D", "E"],
    'Hours' : [7.5, 3.5 ,8, 5, 10]
})

# base hours for a specific project and day, can be simulated as greater but not less
# if multiple projects in one day, simulation will prioritize adding to the larger project
project_adjustments = pd.DataFrame({
    'Mon' : {"A" : 1},
    'Tues' : {},
    'Wed' : {},
    'Thurs' : {},
    'Fri' : {}
})

# acts as the maximum hours for day. Once reached will automatically trigger end-day
# if project adjustment for a day greater than cap, receive error message
daily_caps = pd.DataFrame({
    'Day' : ["Mon","Tues","Wed","Thurs","Fri"],
    'Adjustment' : [float('inf'),float('inf'),float('inf'),float('inf'),float('inf')]
})

In [68]:
daily_caps

Unnamed: 0,Adjustment,Day
0,inf,Mon
1,inf,Tues
2,inf,Wed
3,inf,Thurs
4,inf,Fri


In [69]:
 hours_daily < daily_caps.loc[daily_caps['Day'] == day, 'Adjustment']

4    True
Name: Adjustment, dtype: bool

In [70]:
1 + float('inf')

inf

In [54]:
#### SIMULATE ####
pool = np.repeat(projects["Projects"].values, (projects["Hours"].values/0.25).tolist(), axis = 0).tolist()

hours_daily_mean = projects["Hours"].sum() / 5

hours_daily_sd = abs(projects["Hours"].sum() * sd_slope + sd_int)

days = ["Mon","Tues","Wed","Thur","Fri"]


In [55]:
results         = []
p_ends          = []
p_repeats       = []
p_samples       = []
path_selections = []
hours_blocks    = []
hours_dailies   = []
days_result     = []

In [56]:
day = -1
path = "newday"

In [59]:
while len(pool) > 0 :
    
    if path == "newday":
        day += 1
        hours_daily = 0
        hours_block = 0
    
    weekday = days[day]
    days_result.append(weekday)
    
    if hours == daily_cap:
        path == "newday"
    else:
    
        if path == "newday":
            pick = np.random.choice(pool,1)[0]
            hours_block = 0.25
        else:
            if path == "sample" and len([x for x in pool if x != pick]) > 0:
                pick = np.random.choice([x for x in pool if x != pick],1)[0]
                hours_block = 0.25 
            elif path == "repeat":
                # keep pick 
                hours_block = len(results[[i for i, value in enumerate(path_selections) if value != path][-1]:]) * 0.25   

        pool.remove(pick) 
        results.append(pick)

        if len(results) < 2:
            hours_daily = 0.25
        else:
            hours_daily = len([i for i in days_result if i == weekday]) * 0.25

        if pick in pool:
            p_repeat = 1 - norm.cdf(hours_block,hours_block_mean,hours_block_sd)
        else:
            p_repeat = 0

        if end < 4:
            p_end = norm.cdf(hours_daily,hours_daily_mean, hours_daily_sd)
        else:
            p_end = 0

        if p_repeat + p_end >= 1:
            p_sample = 0
        else:
            p_sample = 1 - p_repeat - p_end

        raw_probs = [p_repeat, p_sample, p_end]
        probs = [float(i) / sum(raw_probs) for i in raw_probs]
        paths = ["repeat","sample","end"]
        path  = np.random.choice(paths, 1, p = probs)[0]

        
        
        path_selections.append(path)
        p_ends.append(p_end)
        p_repeats.append(p_repeat)
        p_samples.append(p_sample)
        hours_blocks.append(hours_block)
        hours_dailies.append(hours_daily)

In [9]:
final = pd.DataFrame({'Day'         : days_result,
                      'Projects'    : results,
                      'p_end'       : p_ends,
                      'p_repeat'    : p_repeats,
                      'p_sample'    : p_samples,
                      'path'        : path_selections,
                      'hours_block' : hours_blocks,
                      'hour_daily'  : hours_dailies
                     })

In [10]:
final

Unnamed: 0,Day,Projects,hour_daily,hours_block,p_end,p_repeat,p_sample,path
0,Mon,E,0.25,0.25,4.797648e-28,0.969604,0.030396,end
1,Mon,E,0.50,0.25,4.319006e-26,0.969604,0.030396,repeat
2,Mon,E,0.75,0.50,3.273336e-24,0.959941,0.040059,repeat
3,Mon,E,1.00,0.75,2.088818e-22,0.947919,0.052081,repeat
4,Mon,E,1.25,1.00,1.122463e-20,0.933193,0.066807,repeat
5,Mon,E,1.50,1.25,5.080084e-19,0.915434,0.084566,repeat
6,Mon,A,1.75,0.25,1.936739e-17,0.969604,0.030396,sample
7,Mon,A,2.00,0.25,6.220961e-16,0.969604,0.030396,repeat
8,Mon,A,2.25,0.50,1.683940e-14,0.959941,0.040059,repeat
9,Mon,A,2.50,0.75,3.842292e-13,0.947919,0.052081,repeat


In [11]:
final.shape

(136, 8)

In [12]:
len(results)

136

In [13]:
projects["Hours"].sum() / 0.25

136.0

In [14]:
len(pool)

0

In [15]:
final2 = final.groupby(['Day','Projects']).size()*0.25
final2

Day   Projects
Fri   A           1.00
      B           1.50
      C           0.75
      D           1.50
      E           2.75
Mon   A           3.00
      E           3.75
Thur  B           0.75
      C           4.00
      D           2.00
Tues  A           1.25
      B           1.25
      E           3.50
Wed   A           2.25
      C           3.25
      D           1.50
dtype: float64

In [16]:
final3 = final.groupby(['Day']).size()*0.25
final3

Day
Fri     7.50
Mon     6.75
Thur    6.75
Tues    6.00
Wed     7.00
dtype: float64