In [1]:
import pm4py
import pandas as pd
import datetime
from math import ceil
import matplotlib.pyplot as plt
import torch
import numpy as np
import gym
from importlib import reload

# Data
## Download and read
- Download `.xes` file(archive) from [here](https://data.4tu.nl/articles/dataset/BPI_Challenge_2012/12689204)
- Read this `.xes`
- Convert to good old `.csv`

In [2]:
file_path = 'BPI_Challenge_2012.xes'
event_log = pm4py.read_xes(file_path)
start_activities = pm4py.get_start_activities(event_log)
end_activities = pm4py.get_end_activities(event_log)
df = pm4py.convert_to_dataframe(event_log)
df.to_csv('bpi_12.csv')

parsing log, completed traces ::   0%|          | 0/13087 [00:00<?, ?it/s]

## Drop data
In the article only (activity, time_stamp) is used. Also leave trace id

In [3]:
df = df[['time:timestamp', 'case:concept:name', 'concept:name']]
df = df.rename(columns={'time:timestamp': 'timestamp', 'case:concept:name': 'trace_id', 'concept:name': 'activity'})
df['trace_id'] = df['trace_id'].apply(lambda x: int(x))

## Time-related features

- $t_{w}$ - time passed between Sunday midnight and the event
- $t_e$ - time passed between the completion of the given event and the completion of the previous one
- $t_t$ - time passed between the start of the trace and the given event

### $t_w$

In [4]:
def get_t_w(df):
    _df = df.copy()
    _dt_s_mn = _df['timestamp'].apply(lambda x: (x - x.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds())
    _dt_s_mn += _df['timestamp'].apply(lambda x: x.weekday() * 24 * 60 * 60)
    return _dt_s_mn.values

In [5]:
tw = get_t_w(df)

### $t_e$

In [6]:
def get_t_e(df):
    te = df['timestamp'].copy().diff()
    tr_diff = df['trace_id'].diff().fillna(1)
    te[tr_diff != 0] = 0.
    return te.values * 1e-9

In [7]:
te = get_t_e(df)

### $t_t$

In [8]:
def get_t_t(df):
    traces = list(set(df['trace_id']))
    out = df.copy()[['timestamp', 'trace_id']]
    t_ts = {}
    for t in traces:
        t_ts[t] = df['timestamp'][df['trace_id'] == t].min()
    out['tt'] = out.apply(lambda x: (x['timestamp'] - t_ts[x['trace_id']]).total_seconds(), axis=1)
    return out['tt'].values

In [9]:
tt = get_t_t(df)

In [10]:
df['tt'] = tt
df['te'] = te
df['tw'] = tw

In [11]:
n_classes = len(set(df['activity']))

In [12]:
df.head()

Unnamed: 0,timestamp,trace_id,activity,tt,te,tw
0,2011-10-01 00:38:44.546000+02:00,173688,A_SUBMITTED,0.0,0.0,434324.546
1,2011-10-01 00:38:44.880000+02:00,173688,A_PARTLYSUBMITTED,0.334,0.334,434324.88
2,2011-10-01 00:39:37.906000+02:00,173688,A_PREACCEPTED,53.36,53.026,434377.906
3,2011-10-01 00:39:38.875000+02:00,173688,W_Completeren aanvraag,54.329,0.969,434378.875
4,2011-10-01 11:36:46.437000+02:00,173688,W_Completeren aanvraag,39481.891,39427.562,473806.437


## Scaling
later

## Activity:
one hot

In [13]:
oh = pd.get_dummies(df['activity'])

In [14]:
df = pd.concat([df, oh], axis=1)

## Environment
Ther given scheme is the following:
- recieving window of $(a_i,\ t_{e,\ i},\ t_{w,\ i},\ t_{t,\ i}) = e_i$. So the input to model is $\{ e_{i},\ e_{i-1},\ \dots,\ e_{i-ws} \}$ 
- prodice $\hat{e}_{i+1}$
- predict $\hat{e}_{i+2}$ using $\{ \hat{e}_{i+1},\ e_{i},\ \dots,\ e_{i-ws +1} \}$
The metric is calculated by `environment`. It returns rewards for time prediction and for next step classifiation. So basicly `env` just stores data of trace. 

### Default predictor
Need to develop(debug) `Env`

In [15]:
class Predictor:
    def __init__(self, default_te=60, default_act=8):
        self.default_act = default_act
        self.default_te = default_te
    def predict_te(self, x):
        in_sh = x.shape[0]
        return torch.ones(in_sh) * self.default_te
    
    def predict_a(self, x):
        in_sh = x.shape[0]
        return torch.ones(in_sh) * self.default_act

Chose `[trace_id]` and create butch of traces, for learning

In [19]:
import rl4pm_lib.utils as utils
reload(utils)

<module 'rl4pm_lib.utils' from 'C:\\Users\\ALemikhov\\Documents\\5th_courth\\rl4pm\\rl4pm_lib\\utils.py'>

In [20]:
env_trace_ids = list(set(df['trace_id'].values))[0: 4]
env_matrix = None
max_len = 0
for t_id in env_trace_ids:
    trace_len = df[df['trace_id'] == t_id].shape[0]
    if max_len < trace_len:
        max_len = trace_len
        

for _i, t_id in enumerate(env_trace_ids):
    if env_matrix is not None:
        
        trace_vals = utils.extract_trace_features(df, t_id, max_len)
        env_matrix = torch.cat([env_matrix, trace_vals])
    else:
        env_matrix = utils.extract_trace_features(df, t_id, max_len)

These ara 'answers', and initial input

In [21]:
window_size = 2
predictor = Predictor()

In [22]:
inp = env_matrix[:, :window_size]

In [23]:
a = predictor.predict_a(inp)
te = predictor.predict_te(inp)
print(a)
print(te)

tensor([8., 8., 8., 8.])
tensor([60., 60., 60., 60.])


Then this predictions are inputs for next event prediction 

In [24]:
column_feature = {'te': 0, 'tt': 1, 'tw': 2}

In [26]:
import rl4pm_lib.envs as envs
reload(envs)

<module 'rl4pm_lib.envs' from 'C:\\Users\\ALemikhov\\Documents\\5th_courth\\rl4pm\\rl4pm_lib\\envs.py'>

In [27]:
next_inp = envs.get_next_input(inp, a, te, column_feature)

In [28]:
next_inp[0, :, 2]

tensor([342983.8270, 343043.8125], dtype=torch.float64)

Basicly this is for NN's predictions, but for env function which works with 1 event window is needed

In [30]:
next_inp_ = envs.get_next_input(inp[0].unsqueeze(0), a[0].unsqueeze(0), te[0].unsqueeze(0), column_feature)[0]

In [31]:
next_inp_.shape

torch.Size([2, 27])

Working is snippet is just above

Also `env` returns a reward for predicion. Step is applied not for tensor of events for several traces, but for 1 event of trace

In [32]:
te_key_times = [0., 1., 10., 60., 120., 240., 480., 1440., 2880., 4320.,
                7200., 10080., 14400., 20160., 30240., 40320., 50400.]
te_intervals = [(te_key_times[i], te_key_times[i+1])
             for i in range(len(te_key_times)-1)]

In [33]:
# here wee neet counter to controll answers
curr_step = 3
trace = 2
te_pred = next_inp[trace, -1, column_feature['tt']]
te_true = env_matrix[trace, curr_step, column_feature['tt']]

print(f'true: {te_true}\npred: {te_pred}\nnice))))')

true: 33877.818
pred: 60.14899826049805
nice))))


In [35]:
true = torch.tensor([62., 700., 61.])
pred = torch.tensor([700., 62., 62.]) 
envs.get_te_reward(true=true, pred=pred, intervals=te_intervals)

tensor([0, 0, 1])

In [36]:
true = torch.tensor([62., 700., 61.])
pred = torch.tensor([700., 62., 62.]) 
assert (envs.get_te_reward(true=true, pred=pred, intervals=te_intervals) == torch.tensor([0, 0, 1]).bool()).all

Pipe line to deal with multiple traces needed

In [37]:
assert envs.get_act_reward(true_act_oh=torch.tensor([[1, 0, 0, 0]]), pred_act_oh=torch.tensor([[1, 0, 0, 0]])) == 1
assert envs.get_act_reward(true_act_oh=torch.tensor([[0, 1, 0, 0]]), pred_act_oh=torch.tensor([[1, 0, 0, 0]])) == 0

In [38]:
envs.get_act_reward(true_act_oh=torch.tensor([[1, 0, 0, 0],
                                          [1, 0, 0, 0]
                                         ]),
                pred_act_oh=torch.tensor([[1, 0, 0, 0],
                                          [0, 1, 0, 0]
                                         ]))

tensor([1, 0])

We can prodice all the env must do in working cycle:
```
next_s, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_act)
```
Let's build class! 

In [39]:
from rl4pm_lib import envs
reload(envs)

<module 'rl4pm_lib.envs' from 'C:\\Users\\ALemikhov\\Documents\\5th_courth\\rl4pm\\rl4pm_lib\\envs.py'>

Gonna run this and go chill

In [40]:
env = envs.PMEnv(data=env_matrix, intervals_te_rew=te_intervals, column_to_time_features=column_feature, window_size=window_size)

inp = env.reset()
# predictor is a NN, it works with batches of states
is_done = torch.zeros(env_matrix.shape[0]).bool()
while not is_done.all():

    n_traces = inp.shape[0]
    next_act = predictor.predict_a(inp.view(n_traces, -1))
    next_te = predictor.predict_te(inp.view(n_traces, -1))
    inp, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_act)

## LSTM-based NN
Ok here I gonna quiqly build some simply NN, which behaves just like predictor(which was used for debug).
Later this NN will be used for Q-Learning
``` python
env_matrix = [n_traces=4, max_seq_len=52, features=27]
```

In [214]:
import torch.nn as nn

In [215]:
class Net(nn.Module):
    def __init__(self, output_layer, input_size=27 * 2, hidden_layer=64, n_lstm=1):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_layer, batch_first=True, num_layers=n_lstm)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_layer, output_layer)
    
    def forward(self, x, h):
        x, (h, c) = self.lstm(x, (h[0], h[1]))
        x = self.relu(x)
        x = self.fc(x)
        x = self.relu(x)
        return x, (h, c)

```python
input = (n_traces, max_len, features) # nn.LSTM(..., batch_first=True)
```

```python
output = out, (h, c)
out.shape = (n_traces, max_len, features) # nn.LSTM(..., batch_first=True)
```

### Loop with env, LSTM

In [216]:
env = envs.PMEnv(data=env_matrix, intervals_te_rew=te_intervals, column_to_time_features=column_feature, window_size=window_size)

lstm_model_te = Net(output_layer=1).float()
lstm_model_act = Net(output_layer=n_classes).float()

inp = env.reset()
n_traces = inp.shape[0]
# predictor is a NN, it works with batches of states
is_done = torch.zeros(env_matrix.shape[0]).bool()
h_a = torch.zeros(1, n_traces, 64)
c_a = torch.zeros(1, n_traces, 64)
h_te = torch.zeros(1, n_traces, 64)
c_te = torch.zeros(1, n_traces, 64)
while not is_done.all():
    inp = inp.view(n_traces, 1, -1).float()
    next_act, (h_a, c_a) = lstm_model_act(inp, (h_a, c_a))
    next_te, (h_te, c_te) = lstm_model_te(inp, (h_te, c_te))
    
    next_act = next_act.view(n_traces, -1)
    next_act = next_act.argmax(dim=1).view(n_traces, -1)
    
    next_te = next_te.view(n_traces)
        
    inp, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_act)

## Agent
### Nets

### Agents

<module 'rl4pm_lib.agents' from 'C:\\Users\\ALemikhov\\Documents\\5th_courth\\rl4pm\\rl4pm_lib\\agents.py'>

### Replay Buffer
shamelessly stolen from [here](https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html)

In [325]:
from rl4pm_lib import utils
reload(utils)
reload(agents)
from rl4pm_lib import agents
reload(agents)

<module 'rl4pm_lib.agents' from 'C:\\Users\\ALemikhov\\Documents\\5th_courth\\rl4pm\\rl4pm_lib\\agents.py'>

In [232]:
replay_buffer = utils.ReplayMemory(2 ** 12)

In [233]:
env = envs.PMEnv(data=env_matrix, intervals_te_rew=te_intervals, column_to_time_features=column_feature, window_size=window_size)

hidden = 64

te_agent = agents.AgentTeDiscrete(input_size=27 * 2, hidden_layer=hidden, n_lstm=1, te_intervals=te_intervals)
ac_agent = agents.AgentAct(input_size=27 * 2, hidden_layer=hidden, n_lstm=1, out_shape=n_classes)

lstm_model_act = Net(output_layer=n_classes).float()

inp = env.reset()
n_traces = inp.shape[0]
inp = inp.view(n_traces, 1, -1).float()
# predictor is a NN, it works with batches of states
is_done = is_done = torch.zeros(env_matrix.shape[0]).bool()
h_a = torch.zeros(1, n_traces, hidden)
c_a = torch.zeros(1, n_traces, hidden)
h_t = torch.zeros(1, n_traces, hidden)
c_t = torch.zeros(1, n_traces, hidden)
while not is_done.all():
       
    next_ac, (h_a, c_a) = ac_agent.sample_action(x=inp, hidden=(h_a, c_a))
    next_te, (h_t, c_t) = te_agent.sample_action(x=inp, hidden=(h_t, c_t))
    
    n_inp, (reward_te, reward_act), is_done, add_inf = env.step(te_agent.act_to_te(next_te), next_ac)
    n_inp = n_inp.view(n_traces, 1, -1).float()
    datum = inp, next_te, next_ac, reward_te, reward_act, n_inp, is_done
    replay_buffer.push(datum)
    
    inp = n_inp

## Agncy
Here agent got trained

In [326]:
from rl4pm_lib import agencies
reload(agencies)
_ = env.reset()
batch_size = 64

agency = agencies.Agency(input_size=27 * 2, hidden=hidden, n_lstm=1, te_intervals=te_intervals, ac_learning_rate=1e-3,
                 te_learning_rate=1e-3, n_classes=n_classes, discount_factor=0.9)

with torch.no_grad():
    episode_reward, _ = utils.play_and_record(te_agent, ac_agent, env, replay_buffer)
    agency.train(env, replay_buffer, batch_size)

Agency.get_loss_discrete_agent:: qs.requires_grad=False
Agency.get_loss_discrete_agent:: qs.requires_grad=False


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn