# This is a sandbox for some tests with RCPSPs#

In [1]:
import numpy as np
import pandas as pd
import re # regex
import random
from itertools import chain, combinations

In [2]:
#path = r'D:\Gregor\Dropbox\Studium\j120.sm\j12060_9.sm'
#path = r'C:\Users\webgr_000\Dropbox\Studium\[M04] - WS 2018-2019\Thesis\Test Sets\J30\J30.sm\j301_1.sm'
path = r'D:\Gregor\Dropbox\Studium\[M04] - WS 2018-2019\Thesis\Test Sets\J30\J30.sm\j301_1.sm'

with open(path, "r") as f:
    file = f.readlines()

Let's parse the file! It has some unique structure so we will hard-code the different parts. We start with some general data about the project

In [3]:
general = {}
for line in file[1:3]:
    l = list(map(str.strip, line.split(sep = ':')))
    general[l[0]] = l[1]

project = {}
for line in file[4:7]:
    l = list(map(str.strip, line.split(sep = ':')))
    project[l[0]] = l[1]

info = (list(map(str.split, file[13:15])))
for i in range(len(info[0])):
    project[info[0][i]] = info[1][i]

general_df = pd.DataFrame.from_dict(general, orient='index', columns=['value'])
project_df = pd.DataFrame.from_dict(project, orient='index', columns=['value'])
project_df

Unnamed: 0,value
projects,1
jobs (incl. supersource/sink ),32
horizon,158
pronr.,1
#jobs,30
rel.date,0
duedate,38
tardcost,26
MPM-Time,38


These are the __resources__: The number of resources is always four.

In [7]:
resources = pd.DataFrame.from_dict({'R' + str(i + 1): file[-2].split()[i] for i in range(4)}, orient='index', columns=['amount'])
resources

Unnamed: 0,amount
R1,12
R2,13
R3,4
R4,12


Next, all the __relations__ and __durations__ of the tasks.

In [8]:
sep = re.compile('[*]+') # regex pattern to find the end of a block; blocks are seperated by a row of ****
dur_sep = re.compile('[RND][ \d]{2}|[\w.]+')

relations = []
idx = 17 # start index to find the precedence relations
while not re.findall(sep, file[idx]):
    relations.append(file[idx])
    idx += 1

durations = []
idx = idx + 4 # start index to find the task's durations. They start four lines after the relations block.
header = re.findall(dur_sep, file[idx - 2])
while not re.findall(sep, file[idx]):
    durations.append(file[idx].strip().split())
    idx += 1
durations_df = pd.DataFrame(durations, columns=header)

relations_df = pd.DataFrame(columns=relations[0].split())
for i, rel in enumerate(map(str.split, relations[1:])):
    relations_df.loc[i] = [rel[0], rel[1], rel[2], list(map(int, rel[3:]))]

# now merge/join both DataFrames and set the columns types to numeric (when possible)
df = relations_df.merge(durations_df, how='right', on='jobnr.')
df.set_index('jobnr.', inplace=True)
df = df.apply(pd.to_numeric, errors='ignore')

In [9]:
print(df.dtypes)
df.head()

#modes          int64
#successors     int64
successors     object
mode            int64
duration        int64
R 1             int64
R 2             int64
R 3             int64
R 4             int64
dtype: object


Unnamed: 0_level_0,#modes,#successors,successors,mode,duration,R 1,R 2,R 3,R 4
jobnr.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,3,"[2, 3, 4]",1,0,0,0,0,0
2,1,3,"[6, 11, 15]",1,8,4,0,0,0
3,1,3,"[7, 8, 13]",1,4,10,0,0,0
4,1,3,"[5, 9, 10]",1,6,0,0,0,3
5,1,1,[20],1,3,3,0,0,0


## Basic Random Walk ##
Let's go on a walk - a random walk actually. We will start at the first node and randomly choose the next node to visit. We will also add up the time it takes to pass the nodes. We will not consider the resources for now.

In [200]:
node = df.loc[str(1)]
successors = node.successors
duration = 0
print('starting with job', node.name)

while len(successors) > 0:
    node = df.loc[str(random.choice(successors))]
    print('successors:', successors, '-> next job:', node.name)
    successors = node.successors
    duration += node.duration

print('total duration:', duration)

starting with job 1
successors: [2, 3, 4] -> next job: 3
successors: [5, 8, 17] -> next job: 5
successors: [18, 48, 52] -> next job: 18
successors: [25, 30, 67] -> next job: 30
successors: [45, 61, 90] -> next job: 90
successors: [98] -> next job: 98
successors: [102, 107, 111] -> next job: 111
successors: [117] -> next job: 117
successors: [119] -> next job: 119
successors: [122] -> next job: 122
total duration: 64


Let's have a look at the resource use. We copy the random walk from above and add the constraints: we have only limited resources available (the number of available units is listed in _resource_df_ for each resource) and jobs block those resources for different amounts of time. When no free resources are available to start the next tast we have to wait until a preceding task is completed. We are first going to try a greedy approach and just pick as many tasks as possible to start at each moment.

In [472]:
limits = pd.concat([resources_df.iloc[3:], resources_df.iloc[3:]], axis = 1)
limits.columns = ['limits', 'available']
limits = limits.apply(pd.to_numeric)
limits

Unnamed: 0,limits,available
R 1,59,59
R 2,58,58
R 3,53,53
R 4,56,56


In [202]:
successors = list(df.loc[str(1)].successors)
duration = 0
running_jobs = {}
finished_jobs = {}

while len(successors) > 0:
    # add all jobs to running_jobs as long as the resources are not depleted
    for s in successors:
        if not s in running_jobs and min(limits.available - df.loc[str(s)][5:]) >= 0:
            running_jobs[s] = df.loc[str(s)].duration
            limits.available -= df.loc[str(s)][5:]

    # move on to the next timestep
    duration += 1
    for key in list(running_jobs):
        running_jobs[key] -= 1

        # let's see if a job is finished
        if running_jobs[key] <= 0:
            if not key in finished_jobs:
                finished_jobs[key] = duration
            running_jobs.pop(key)
            limits.available += df.loc[str(key)][5:]
            successors.remove(key)
            if len(df.loc[str(key)].successors) > 0:
                successors.extend(df.loc[str(key)].successors)
            else:
                successors = []

print('finished:', finished_jobs)

finished: {2: 1, 4: 2, 15: 3, 39: 4, 45: 8, 16: 8, 3: 9, 106: 9, 48: 9, 9: 10, 6: 11, 11: 11, 44: 11, 7: 12, 17: 12, 64: 13, 84: 17, 72: 17, 81: 19, 24: 19, 28: 19, 108: 19, 8: 19, 5: 20, 109: 20, 33: 24, 63: 24, 78: 25, 32: 25, 20: 25, 23: 26, 12: 27, 91: 27, 49: 28, 30: 29, 93: 29, 105: 30, 115: 30, 96: 30, 27: 30, 21: 31, 114: 31, 118: 31, 46: 33, 29: 33, 104: 33, 95: 36, 112: 37, 94: 37, 119: 37, 56: 37, 18: 38, 122: 38}


This is a deterministic solution. It starts at job 1 (which is just a dummy node) and starts all its successors (jobs 2, 3, 4). These jobs finish after different time periods: job 1 is finished at time step 1, job 4 at time step 2 and job 3 at time step 9. The successors of job 2 (jobs 6, 15 and 45) can therefore be started in time step 1 whereas the successors of job 3 (jobs 5, 8, 17) can only be started after job 3 is finished at time step 9.

so the path through the network is:
- __1 finished in 0 -> 2, 3, 4__
- __2 finished in 1 -> 6, 15, 45__
- 3 finished in 9 -> 5, 8, 17
- 4 finished in 2 -> 7, 9, 11
- 5 finished in 20 -> 18
- 6 finished in 11 -> 23, 33, 63
- 7 finished in 12 -> 12, 27, 29
- 8 finished in 19 ->
- 9 finished in 10 -> 30, 32, 49
- 11 finished in 11 -> 20, 21, 46
- 12 finished in 27 ->
- __15 finished in 3 -> 16, 39, 44__
- 16 finished in 8 -> 24, 28, 64
- 17 finished in 12 -> 33
- 18 finished in 38 ->
- 20 finished in 25 ->
- 21 finished in 31 ->
- 23 finished in 26 ->
- 24 finished in 19 -> 32, 56
- 27 finished in 30 ->
- 28 finished in 19 ->
- 29 finished in 33 ->
- 30 finished in 29 ->
- 32 finished in 25 ->
- 33 finished in 24 ->
- __39 finished in 4 -> 48, 106__
- 44 finished in 11 -> 105, 109, 115
- 45 finished in 8 -> 81
- 46 finished in 33 ->
- __48 finished in 9 -> 72, 78, 84__
- 49 finished in 28 ->
- 56 finished in 37 ->
- 63 finished in 24 ->
- 64 finished in 13 -> 104, 112
- 72 finished in 17 -> 91, 94, 96
- 78 finished in 25 -> 94
- 81 finished in 19 -> 95
- __84 finished in 17 -> 93, 118__
- 91 finished in 27 ->
- 93 finished in 29 ->
- 94 finished in 37 ->
- 95 finished in 36 ->
- 104 finished in 33 ->
- 105 finished in 30 -> 112
- 106 finished in 9 -> 108, 109
- 108 finished in 19 -> 114
- 109 finished in 20 ->
- 112 finished in 37 ->
- 114 finished in 31 ->
- 115 finished in 30 ->
- __118 finished in 31 -> 119__
- __119 finished in 37 -> 122__

Going this path takes the algorithm 37 time steps whereas the time steps for the path from the start node 1 to the sink node 122 (1-2-15-39-48-84-118-119-122) would only take 14 time steps. That's far from optimal!

In [203]:
network_path = df.loc[list(map(str, [1, 2, 15, 39, 48, 84, 118, 119, 122]))]
print('summed up durations:', sum(network_path.duration))
network_path

summed up durations: 14


Unnamed: 0_level_0,#modes,#successors,successors,mode,duration,R 1,R 2,R 3,R 4
jobnr.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,3,"[2, 3, 4]",1,0,0,0,0,0
2,1,3,"[6, 15, 45]",1,1,2,3,8,1
15,1,3,"[16, 39, 44]",1,2,6,4,2,2
39,1,2,"[48, 106]",1,1,6,5,5,6
48,1,3,"[72, 78, 84]",1,1,6,6,8,1
84,1,2,"[93, 118]",1,5,3,1,3,6
118,1,1,[119],1,1,5,9,5,5
119,1,1,[122],1,3,4,1,9,1
122,1,0,[],1,0,0,0,0,0


When trying to find a solution in the RCPSP we need to know whether we can start a job - we can do so if all its preceding activities are finished. Let's add a list of predecessos to the dataframe *df*.

In [473]:
succ = {'1':[]}
for index, row in df.iterrows():
    for successor in row['successors']:
        if str(successor) in succ:
            succ[str(successor)].append(int(index))
        else:
            succ[str(successor)] = [int(index)]

df['predecessors'] = df.index.map(succ)

In [474]:
df.tail()

Unnamed: 0_level_0,#modes,#successors,successors,mode,duration,R 1,R 2,R 3,R 4,predecessors
jobnr.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
118,1,1,[119],1,1,5,9,5,5,"[84, 100, 110]"
119,1,1,[122],1,3,4,1,9,1,"[113, 117, 118]"
120,1,1,[122],1,1,4,6,4,2,"[80, 115, 116]"
121,1,1,[122],1,5,6,6,9,7,"[76, 101, 107]"
122,1,0,[],1,0,0,0,0,0,"[119, 120, 121]"


## Monte Carlo Implementation ##
Based on Johannes Meyl's thesis and his pseudo-code description on page 40. There are three phases during the running time of the algorithm:
1. Exploration phase - short term (**n_s cycles**)
2. Learning phase - mid term (**n_m cycles**)
3. Convergence phase (**n_max - n_m - n_s cycles**)

There is also a parameter **n_noCha** [1, n_max] that defines the number of cycles during which we must not change the size of the Q-matrix.

In [300]:
n_s, n_m, n_max, n_noCha = 0, 0, 0, 0

Meyl uses different greediness parameters ε during theses phases. We define the function to determine the parameter:

$$\epsilon(n)=\left\{\begin{array}{ll} \varepsilon_s, & 0 \leq n \leq n_s \\
              \frac{\varepsilon_f - \varepsilon_s}{n_m}(n - n_s) + \varepsilon_s & n_s < n \leq n_s + n_m \text{, with } f_\varepsilon = 0 \\
              \varepsilon_m & n_s < n \leq n_s + n_m \text{, with } f_\varepsilon = 1 \\
              0 & n_s + n_m < n \leq n_{max} \end{array}\right.$$

In [468]:
def epsilon(n, function_type=0, eps_m=0):
    '''
    Returns the value of the greediness parameter epsilon that depends on the phase
    (exploration, learning, convergence) of the algorithm.
    function_type -> choose 0 for a for a constant decay and 1 for a constant value eps_m
    '''
    if 0 <= n <= n_s:
        return eps_s
    elif n_s < n <= (n_s + n_m):
        if function_type:
            return eps_m
        else:
            return (((eps_f - eps_s) / n_m) * (n - n_s) + eps_s)
    elif (n_s + n_m) < n <= n_max:
        return 0

The reward function can use known project durations *t_min* and *t_max* to encourage the algorithm to improve good solutions even more.

$$r(t)=\left\{\begin{array}{ll} 1 - \left(\frac{t - t_{min}}{t_{max}}\right)^{0,4} & f_r = 0 \\
              \frac{t - t_{max}}{t_{min} - t_{max}} & f_r = 1 \end{array}\right.$$

In [430]:
def reward(t, function_type=0):
    '''
    function_type -> if the times t_min and t_max are unknown choose type 1
                     and calculate the times manually.
    '''
    if function_type:
        return (t - t_max) / (t_min - t_max)
    else:
        return 1 - ((t - t_min) / t_max)**0.4

Meyl implements two different types of learning rates: one uses a constant value around 0.01 and one uses a learning rate that decays linearly during the convergence phase (during (*n_s + n_m*) and *n_max*).

In [1]:
def learning_rate(n, function_type=0, alpha_f=0):
    '''
    Returns the learning rate alpha that is either a constant value alpha_b
    or can be chosen to decay during the convergence phase (for n > n_m + n_s).
    function_type -> choose 0 for a function that linearly decays from alpha_b
                     to a final value alpha_f during the convergence phase or
                     choose 1 for the constant value alpha_b.
    '''
    if 0 <= n <= (n_s + n_m):
        return alpha_b
    elif (n_s + n_m) < n <= n_max:
        if function_type:
            return (((alhpha_b - alpha_f) / (n_m + n_s - n_max)) * (n - n_max)) + alpha_f
        else:
            return alpha_b

These are the parameters Meyl used in his test run to compare his Monte Carlo Approach with a Q-Learning Algorithm. He runs the algorithm three times for n_max in [1000, 2500, 5000]. He uses (though he erroneously uses function type 1 for epsilon):
* *function_type 0* for the epsilon function (linearly decreasing epsilon during learning phase)
* *function_type 1* for the reward function (specified *t_min* and *t_max*)
* *function_type 0* for the learning rate (constant learning rate *alpha_b*)

In [432]:
eps_s, eps_f = 1, 0.05 # function_type 0 -> no need for eps_m
alhpha_b = 0.1 # function_type = 0 -> constant value

Meyl uses an object state with a unique ID and two lists for idle tokens (jobs) that can be started when resources are available (so all preceding nodes have been finished) and busy tokens for running jobs. We will just use a DataFrame with two columns for easy access to replicate this object as Meyl does not define any other functions for this object. We will also add the inital state *1* and the possible actions (which are simple the successors of state 1).

In [453]:
states = pd.DataFrame(columns=['idle', 'busy'])
states.loc['1'] = [df.loc['1'].successors, []]
states

Unnamed: 0,idle,busy
1,[],[]


Likewise he defines an action object that is linked to the state object. The action object is linked to a state and has an ID that identifies it uniquely together with the state ID (as we can usually start different jobs (i.e. start different actions) in each state). The object also keeps a list of all executed jobs. A DataFrame is suitable to replicate the object for now.

In [454]:
actions = pd.DataFrame(columns = ['StateID', 'ActionID', 'executedJobs'])
actions

Unnamed: 0,StateID,ActionID,executedJobs


Meyl's Q-table is implemented as a list of state-action pairs to avoid an actual table with mostly zero-values.

This, for example, is a Q-table with two states and two actions for state 0, one action for state 1 and two actions for state 2:

$$Q(s,a)=\begin{array}{c c} &
\begin{array}{c c c c c} a_{0,0} & a_{0,1} & a_{1,0} & a_{2,0} & a_{2,1} \end{array} 
\\
\begin{array}{c c c} s_0 \\ s_1 \\ s_2 \end{array} 
&
\left[
\begin{array}{c c c c c}
q_{0,0} & q_{0,1} & & &\\
 & & q_{1,0} & &\\
 & & & q_{2,0} & q_{2,1}
\end{array}
\right]
\end{array}$$

It's implemented as a dictionaries of states with a nested dictionary for the state's actions as key and the q-value as value:

qtable = {<br>
's_0' : {'a_0' : 'q_0,0', 'a_1' : 'q_0,1},<br>
's_1' : {'a_0' : 'q_1,0'},<br>
's_2' : {'a_0' : 'q_2,0', 'a_1' : 'q_2,1'}<br>
}

In [435]:
qtable = {}

Lese Zustand s_t aus;<br>
Speichere st auf Zustandsliste;<br>
**if** s_t *ist neu* **then**<br>
&emsp; Speichere s_t;<br>
**else**<br>
&emsp; Finde s_t in Menge aller Zustände S;<br>
**end**<br>

We will skip this step and jump right to the next block in Meyl's algorithm and integrate the above part:
<hr>

Finde Aktionen a_(t,i) für s_t;<br>
**if** eps < Random-Wert zwischen 0 und 1 **then**<br>
&emsp; Generiere Entscheidungsliste zufällig;<br>
**else**<br>
&emsp; **if** s_t *ist neu* **then**<br>
&emsp; &emsp; Generiere Entscheidungsliste zufällig;<br>
&emsp; **else**<br>
&emsp; &emsp; Lese Q-Values Q(s_t, a_(t,i)) jeder a_(t,i) aus;<br>
&emsp; &emsp; Finde a_(t,i) mit max_a Q(s_t;a_(t,i));<br>
&emsp; &emsp; Lese IDs der auszuführenden Missionen von a_(t,max) aus;<br>
&emsp; &emsp; Füge IDs auf Entscheidungsliste hinzu;<br>
&emsp; **end**<br>
**end**

We will need to define some values for n_s, n_m and n_max in order to run the *epsilon*-function we defined above. Meyl does not provide values for n_s and n_m, though. Let's also restructure the code and add a couple of helper functions.

In [308]:
n_s, n_m, n_max = 200, 800, 1000

In [436]:
def get_ressource_needs(key, limits, df):
    '''
    return the ressource needs as a list for the job specified as key
    key -> a job id
    limits -> a list of all the ressources
    df -> the network DataFrame with all states
    '''
    return df.loc[key][limits.index]

def get_actions(key, limits, state, df):
    '''
    For each action in the powerset of actions find the needed ressources and see if the action would be feasible.
    An action is feasible if it does not need more ressources than available.
    '''
    available_actions = state.idle
    feasible_actions = []
    possible_actions = chain.from_iterable(combinations(available_actions, r) for r in range(len(available_actions) + 1))

    for action in possible_actions:
        needs = pd.DataFrame(get_ressource_needs(str(key), limits, df) for key in action).sum(axis='rows')
        if min(limits.available.subtract(needs, fill_value=0)) >= 0:
            feasible_actions.append(action)

    return feasible_actions

def get_next_action(n, state, df, key, limits):
    '''
    Find the state's successors and create the powerset to get all possible actions, i.e. for successors [A, B, C] the
    eight possible actions [(), (A), (B), (C), (A, B), (A, C), (B, C), (A, B, C)]. Check all combinations for their
    feasibility in terms of ressource constraints.
    n -> number of iterations: the calculation of the greediness epsilon depends on that parameter
    df -> used to find the ressources of each job to see which combinations of tasks are feasible
    key -> id of a job
    limits -> the ressource constraints and currently bound ressources
    '''
    # if there are idle states that could be started
    if state.idle:
        if epsilon(n) < random.random():            
            return random.choice(get_actions(str(key), limits, state, df))
    else:
        print(actions.loc[str(key)])
        return []

def get_new_action_id(state_id, actions):
    return max(list(actions[actions.StateID == state_id].ActionID)) + 1 if state_id in actions.StateID.values else 0

def is_finished(states):
    for state in states.itertuples():
        if not (state.idle + state.busy) == []:
            return False

    return True

def add_running_jobs(action, running_jobs, df):
    for job in action:
        running_jobs[str(job)] = df.loc[str(job)].duration

In [217]:
for state in states.itertuples():
    action = get_next_action(1000, state, df, state.Index, limits)
    actions = actions.append({
        'StateID': state.Index,
        'ActionID': get_new_action_id(state.Index, actions),
        'executedJobs': action
    }, ignore_index = True)
    state.busy.extend(action)
    [state.idle.remove(i) for i in action]

print(states)

     idle busy
1  [3, 4]  [2]


In [218]:
busy = '2'
# if all predecessors of a state are finished we can add it to the list of states and wait for it to be started
if sum([str(predecessor) not in states.index for predecessor in df.loc['4'].predecessors]) == 0:
    states.loc[busy] = [df.loc[busy].successors, []]
    states.loc['1'].busy.remove(int(busy))

In [437]:
states

Unnamed: 0,idle,busy
1,"[2, 3, 4]",[]


Führe a_(t,max) aus;<br>
Speichere a_(t,max) auf Aktionsliste;<br>
Lese s_(t+1) aus;<br>
**if** s_t = finaler Zustand **then**<br>
&emsp; Berechne Belohnung r_(t+1);<br>
&emsp; **for** j = 0 to Zustandslistenlänge **do**<br>
&emsp; &emsp; Q(s_(t−j); a_(t−j,max)) ← Q(s_(t−j, a_(t−j,max)) + α[r_(t+1) − Q(s_(t−j, a_(t−j,max))];<br>
&emsp; **end**<br>
**else**<br>
**end**

In [438]:
running_jobs = {}

# Let's run a whole iteration
I will define all variables, initialise the problem and use the functions that were defined above

In [480]:
eps_s, eps_f = 1, 0.05 # function_type 0 -> no need for eps_m
alhpha_b = 0.1 # function_type = 0 -> constant value
n_s, n_m, n_max = 200, 800, 1000
t = 0

actions = pd.DataFrame(columns = ['StateID', 'ActionID', 'executedJobs'])
states = pd.DataFrame(columns=['idle', 'busy'])
states.loc['1'] = [df.loc['1'].successors, []]
running_jobs = {}

# while there are states left
while not is_finished(states):
    print('iteration', t, 'finished with states', states, 'and actions', actions)
    for state in states.itertuples():
        '''
        find next action and add it to the actions-list. Then add the action's jobs to the busy-list and remove
        them from the idle list. Eventually add them to the running jobs with their duration and update the
        ressource use.
        '''
        action = get_next_action(1000, state, df, state.Index, limits)
        actions = actions.append({
            'StateID': state.Index,
            'ActionID': get_new_action_id(state.Index, actions),
            'executedJobs': action
        }, ignore_index = True)

        # update busy, idle and running jobs
        state.busy.extend(action)
        [state.idle.remove(i) for i in action]
        add_running_jobs(action, running_jobs, df)

        # update ressources
        needs = pd.DataFrame(get_ressource_needs(str(key), limits, df) for key in action).sum(axis='rows')
        limits.available = limits.available.subtract(needs, fill_value=0)

        ########################################################
        t += 1 # update timer and move on the the next time-step
        ########################################################

        for key, value in running_jobs.copy().items():
            running_jobs[key] -= 1

            # free ressources if the task is finished and remove the task from the list of running jobs
            if value - 1 == 0:
                limits.available = limits.available.add(get_ressource_needs(str(key), limits, df), fill_value=0)
                running_jobs.pop(str(key))

        print('iteration', t, 'finished with states', states, 'and actions', actions)