In [1]:
import pm4py
import pandas as pd
import datetime
from math import ceil
import matplotlib.pyplot as plt
import torch
import numpy as np
import gym

# Data
## Download and read
- Download `.xes` file(archive) from [here](https://data.4tu.nl/articles/dataset/BPI_Challenge_2012/12689204)
- Read this `.xes`
- Convert to good old `.csv`

In [2]:
file_path = 'BPI_Challenge_2012.xes'
event_log = pm4py.read_xes(file_path)
start_activities = pm4py.get_start_activities(event_log)
end_activities = pm4py.get_end_activities(event_log)

HBox(children=(HTML(value='parsing log, completed traces :: '), FloatProgress(value=0.0, max=13087.0), HTML(va…




In [3]:
df = pm4py.convert_to_dataframe(event_log)
df.to_csv('bpi_12.csv')

## Drop data
In the article only (activity, time_stamp) is used. Also leave trace id

In [4]:
df = df[['time:timestamp', 'case:concept:name', 'concept:name']]
df = df.rename(columns={'time:timestamp': 'timestamp', 'case:concept:name': 'trace_id', 'concept:name': 'activity'})
df['trace_id'] = df['trace_id'].apply(lambda x: int(x))

## Time-related features

- $t_{w}$ - time passed between Sunday midnight and the event
- $t_e$ - time passed between the completion of the given event and the completion of the previous one
- $t_t$ - time passed between the start of the trace and the given event

### $t_w$

In [5]:
def get_t_w(df):
    _df = df.copy()
    _dt_s_mn = _df['timestamp'].apply(lambda x: (x - x.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds())
    _dt_s_mn += _df['timestamp'].apply(lambda x: x.weekday() * 24 * 60 * 60)
    return _dt_s_mn.values

In [6]:
tw = get_t_w(df)

### $t_e$

In [7]:
def get_t_e(df):
    te = df['timestamp'].copy().diff()
    tr_diff = df['trace_id'].diff().fillna(1)
    te[tr_diff != 0] = 0
    return te.values * 1e-9

In [8]:
te = get_t_e(df)

### $t_t$

In [9]:
def get_t_t(df):
    traces = list(set(df['trace_id']))
    out = df.copy()[['timestamp', 'trace_id']]
    t_ts = {}
    for t in traces:
        t_ts[t] = df['timestamp'][df['trace_id'] == t].min()
    out['tt'] = out.apply(lambda x: (x['timestamp'] - t_ts[x['trace_id']]).total_seconds(), axis=1)
    return out['tt'].values

In [10]:
tt = get_t_t(df)

In [11]:
df['tt'] = tt
df['te'] = te
df['tw'] = tw

In [12]:
n_classes = len(set(df['activity']))

In [13]:
df

Unnamed: 0,timestamp,trace_id,activity,tt,te,tw
0,2011-10-01 00:38:44.546000+02:00,173688,A_SUBMITTED,0.000,0.000,434324.546
1,2011-10-01 00:38:44.880000+02:00,173688,A_PARTLYSUBMITTED,0.334,0.334,434324.880
2,2011-10-01 00:39:37.906000+02:00,173688,A_PREACCEPTED,53.360,53.026,434377.906
3,2011-10-01 00:39:38.875000+02:00,173688,W_Completeren aanvraag,54.329,0.969,434378.875
4,2011-10-01 11:36:46.437000+02:00,173688,W_Completeren aanvraag,39481.891,39427.562,473806.437
...,...,...,...,...,...,...
262195,2012-02-29 23:51:17.423000+01:00,214376,A_PARTLYSUBMITTED,0.624,0.624,258677.423
262196,2012-02-29 23:52:01.287000+01:00,214376,W_Afhandelen leads,44.488,43.864,258721.287
262197,2012-03-01 09:26:46.736000+01:00,214376,W_Afhandelen leads,34529.937,34485.449,293206.736
262198,2012-03-01 09:27:37.118000+01:00,214376,A_DECLINED,34580.319,50.382,293257.118


## Scaling
later

## Activity:
one hot

In [14]:
oh = pd.get_dummies(df['activity'])

In [15]:
df = pd.concat([df, oh], axis=1)

## Environment
Ther given scheme is the following:
- recieving window of $(a_i,\ t_{e,\ i},\ t_{w,\ i},\ t_{t,\ i}) = e_i$. So the input to model is $\{ e_{i},\ e_{i-1},\ \dots,\ e_{i-ws} \}$ 
- prodice $\hat{e}_{i+1}$
- predict $\hat{e}_{i+2}$ using $\{ \hat{e}_{i+1},\ e_{i},\ \dots,\ e_{i-ws +1} \}$
The metric is calculated by `environment`. It returns rewards for time prediction and for next step classifiation. So basicly `env` just stores data of trace. 

### Default predictor
Need to develop(debug) `Env`

In [16]:
class Predictor:
    def __init__(self, default_te=60, default_act=8):
        self.default_act = default_act
        self.default_te = default_te
    def predict_te(self, x):
        in_sh = x.shape[0]
        return torch.ones(in_sh) * self.default_te
    
    def predict_a(self, x):
        in_sh = x.shape[0]
        return torch.ones(in_sh) * self.default_act

Chose `[trace_id]` and create butch of traces, for learning

In [17]:
def fill_trace(trace_np_matrix, max_len):
    need_pad = max_len - trace_np_matrix.shape[0]
    pad = np.zeros((need_pad, trace_np_matrix.shape[1]))
    return np.concatenate((trace_np_matrix, pad))

def extract_trace_features(df, trace_id, max_len):
    df_id = df[df['trace_id'] == t_id].drop(columns=['timestamp', 'trace_id', 'activity'])
    trace_vals = df_id.values
    trace_vals = fill_trace(trace_vals, max_len)
    trace_vals = torch.as_tensor(trace_vals).unsqueeze(0)
    return trace_vals

In [18]:
env_trace_ids = list(set(df['trace_id'].values))[0: 4]
env_matrix = None
max_len = 0
for t_id in env_trace_ids:
    trace_len = df[df['trace_id'] == t_id].shape[0]
    if max_len < trace_len:
        max_len = trace_len
        

for _i, t_id in enumerate(env_trace_ids):
    if env_matrix is not None:
        
        trace_vals = extract_trace_features(df, t_id, max_len)
        env_matrix = torch.cat([env_matrix, trace_vals])
    else:
        env_matrix = extract_trace_features(df, t_id, max_len)

These ara 'answers', and initial input

In [19]:
window_size = 2
predictor = Predictor()

In [20]:
inp = env_matrix[:, :window_size]

In [21]:
a = predictor.predict_a(inp)
te = predictor.predict_te(inp)
print(a)
print(te)

tensor([8., 8., 8., 8.])
tensor([60., 60., 60., 60.])


Then this predictions are inputs for next event prediction 

In [22]:
column_feature = {'te': 0, 'tt': 1, 'tw': 2}

In [23]:
def get_next_input(prev_inp, next_act, next_te, column_feature):
    out = prev_inp[:, 1:]
    next_event = torch.zeros(prev_inp.shape[0], prev_inp.shape[2])
    next_event[:, column_feature['te']] = next_te
    last_event = prev_inp[:, -1].squeeze(1)
    
    next_event[:, column_feature['tt']] = last_event[:, column_feature['tt']] + next_te
    
    next_event[:, column_feature['tw']] = (last_event[:, column_feature['tw']] + next_te ) % (7 * 24 * 60 * 60)
    # one hot transformation from https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/5
    act_onehot = torch.FloatTensor(out.shape[0], out.shape[2] - len(column_feature))
    act_onehot.zero_()
    act_onehot.scatter_(1, next_act.long().view(-1, 1), 1)
    next_event[:, len(column_feature):] = act_onehot

    out = torch.cat([out, next_event.unsqueeze(1)], dim=1)
    return out

In [24]:
next_inp = get_next_input(inp, a, te, column_feature)

In [25]:
next_inp[0, :, 2]

tensor([342983.8270, 343043.8125], dtype=torch.float64)

Basicly this is for NN's predictions, but for env function which works with 1 event window is needed

In [26]:
next_inp_ = get_next_input(inp[0].unsqueeze(0), a[0].unsqueeze(0), te[0].unsqueeze(0), column_feature)[0]

In [27]:
next_inp_.shape

torch.Size([2, 27])

Working is snippet is just above

Also `env` returns a reward for predicion. Step is applied not for tensor of events for several traces, but for 1 event of trace

In [28]:
te_key_times = [0., 1., 10., 60., 120., 240., 480., 1440., 2880., 4320.,
                7200., 10080., 14400., 20160., 30240., 40320., 50400.]
te_intervals = [(te_key_times[i], te_key_times[i+1])
             for i in range(len(te_key_times)-1)]

In [29]:
# here wee neet counter to controll answers
curr_step = 3
trace = 2
te_pred = next_inp[trace, -1, column_feature['tt']]
te_true = env_matrix[trace, curr_step, column_feature['tt']]

print(f'true: {te_true}\npred: {te_pred}\nnice))))')

true: 33877.818
pred: 60.14899826049805
nice))))


In [30]:
def get_te_reward(true: torch.tensor, pred: torch.tensor, intervals):
    for inter in intervals:
        if (true >= inter[0]) & (true < inter[1]): # got true value in this interval
            if (pred >= inter[0]) & (pred < inter[1]):
                return 1
            else:
                return 0
        if (pred >= inter[0]) & (pred < inter[1]): # got pred value in this interval
            return 0
    return 0

In [31]:
def get_te_reward(true: torch.tensor, pred: torch.tensor, intervals):
    masks = []
    for inter in intervals:
        true_here = (true >= inter[0]) * (true < inter[1]) 
        pred_here = (pred >= inter[0]) * (pred < inter[1])
        masks.append(true_here * pred_here)
    out = torch.stack(masks).T.sum(dim=1)
    return out

In [32]:
true = torch.tensor([62., 700., 61.])
pred = torch.tensor([700., 62., 62.]) 
get_te_reward(true=true, pred=pred, intervals=te_intervals)

tensor([0, 0, 1])

In [33]:
true = torch.tensor([62., 700., 61.])
pred = torch.tensor([700., 62., 62.]) 
assert (get_te_reward(true=true, pred=pred, intervals=te_intervals) == torch.tensor([0, 0, 1]).bool()).all

Pipe line to deal with multiple traces needed

In [34]:
def get_act_reward(true_act_oh, pred_act_oh):
    mult = (true_act_oh * pred_act_oh)
    return mult.sum(dim=1)

In [35]:
assert get_act_reward(true_act_oh=torch.tensor([[1, 0, 0, 0]]), pred_act_oh=torch.tensor([[1, 0, 0, 0]])) == 1
assert get_act_reward(true_act_oh=torch.tensor([[0, 1, 0, 0]]), pred_act_oh=torch.tensor([[1, 0, 0, 0]])) == 0

In [36]:
 get_act_reward(true_act_oh=torch.tensor([[1, 0, 0, 0],
                                          [1, 0, 0, 0]
                                         ]),
                pred_act_oh=torch.tensor([[1, 0, 0, 0],
                                          [0, 1, 0, 0]
                                         ]))

tensor([1, 0])

We can prodice all the env must do in working cycle:
```
next_s, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_act)
```
Let's build class! 

In [37]:
class PMEnv(gym.Env):
    def __init__(self, data: torch.tensor, intervals_te_rew, column_to_time_features, window_size):
        self.data = data
        self.pred_counter = 1 + window_size
        self.trace_index = None
        self.intervals = intervals_te_rew
        self.column_feature = column_to_time_features
        self.win = window_size
        self.given_state = None
        
    def reset(self, trace_n=None):
        self.pred_counter = 1 + self.win
        out = self.data[:, :window_size]
        self.given_state = out
        self.trace_index = trace_n
        return out
        
    def step(self, next_te: torch.tensor, next_act: torch.tensor):
        '''
        returns: next_s, (reward_te, reward_act), is_done, add_inf
        '''
        te_rew = get_te_reward(true=self.data[:, self.pred_counter, self.column_feature['te']],
                               pred=next_te, intervals=self.intervals)
        
        true_act_oh = self.data[:, self.pred_counter, len(column_feature):]

        # TODO here rises an error
        pred_act_oh = torch.zeros(self.data.shape[0], self.data.shape[-1] - len(self.column_feature), dtype=int)
        pred_act_oh[range(pred_act_oh.shape[0]), next_act.long()] = 1
        
        act_rew = get_act_reward(true_act_oh=true_act_oh, pred_act_oh=pred_act_oh)
        
        next_s = get_next_input(prev_inp=self.given_state,
                                next_act=next_act,
                                next_te=next_te,
                                column_feature=self.column_feature)
        self.given_state = next_s
        
        is_done = (self.pred_counter == (self.data.shape[1] - 1))
        
        self.pred_counter += 1
        return next_s, (te_rew, act_rew), is_done, {}
        

Gonna run this and go chill

In [38]:
print(env_matrix.shape)

torch.Size([4, 52, 27])


In [39]:
env = PMEnv(data=env_matrix, intervals_te_rew=te_intervals, column_to_time_features=column_feature, window_size=window_size)

inp = env.reset()
# predictor is a NN, it works with batches of states
is_done = False
while not is_done:
    n_traces = inp.shape[0]
    next_act = predictor.predict_a(inp.view(n_traces, -1))
    next_te = predictor.predict_te(inp.view(n_traces, -1))
    inp, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_act)

## LSTM-based NN
Ok here I gonna quiqly build some simply NN, which behaves just like predictor(which was used for debug).
Later this NN will be used for Q-Learning
``` python
env_matrix = [n_traces=4, max_seq_len=52, features=27]
```

In [40]:
import torch.nn as nn

In [41]:
class Net(nn.Module):
    def __init__(self, output_layer, input_size=27 * 2, hidden_layer=64, n_lstm=1):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_layer, batch_first=True, num_layers=n_lstm)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_layer, output_layer)
    
    def forward(self, x, h):
        x, (h, c) = self.lstm(x, (h[0], h[1]))
        x = self.relu(x)
        x = self.fc(x)
        x = self.relu(x)
        return x, (h, c)

```python
input = (n_traces, max_len, features) # nn.LSTM(..., batch_first=True)
```

```python
output = out, (h, c)
out.shape = (n_traces, max_len, features) # nn.LSTM(..., batch_first=True)
```

### Loop with env, LSTM

In [42]:
env = PMEnv(data=env_matrix, intervals_te_rew=te_intervals, column_to_time_features=column_feature, window_size=window_size)

lstm_model_te = Net(output_layer=1).float()
lstm_model_act = Net(output_layer=n_classes).float()

inp = env.reset()
n_traces = inp.shape[0]
# predictor is a NN, it works with batches of states
is_done = False
h_a = torch.zeros(1, n_traces, 64)
c_a = torch.zeros(1, n_traces, 64)
h_te = torch.zeros(1, n_traces, 64)
c_te = torch.zeros(1, n_traces, 64)
while not is_done:
    inp = inp.view(n_traces, 1, -1).float()
    next_act, (h_a, c_a) = lstm_model_act(inp, (h_a, c_a))
    next_te, (h_te, c_te) = lstm_model_te(inp, (h_te, c_te))
    
    next_act = next_act.view(n_traces, -1)
    next_act = next_act.argmax(dim=1).view(n_traces, -1)
    
    next_te = next_te.view(n_traces)
        
    inp, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_act)

## Agent
### Nets

In [43]:
class NetTe(nn.Module):
    def __init__(self, input_size, hidden_layer, n_lstm, te_intervals):
        super(NetTe, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_layer, batch_first=True, num_layers=n_lstm)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_layer, len(te_intervals))
    
    def forward(self, x, h):
        x, (h, c) = self.lstm(x, (h[0], h[1]))
        x = self.relu(x)
        x = self.fc(x)
        return x, (h, c)

In [44]:
class NetAct(nn.Module):
    def __init__(self, input_size, hidden_layer, n_lstm, out_shape):
        super(NetAct, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_layer, batch_first=True, num_layers=n_lstm)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_layer, out_shape)
    
    def forward(self, x, h):
        x, (h, c) = self.lstm(x, (h[0], h[1]))
        x = self.relu(x)
        x = self.fc(x)
        return x, (h, c)

### Agents

In [45]:
class AgentTeDiscrete:
    def __init__(self, input_size, hidden_layer, n_lstm, te_intervals):
        self.net = NetTe(input_size, hidden_layer, n_lstm, te_intervals)
        self.target_net = NetTe(input_size, hidden_layer, n_lstm, te_intervals)
        self.te_intervals = te_intervals
        
    def sample_action(self, x, hidden, stoch=False):
        q_values, hidden = self.net(x, hidden)
        q_values = q_values.view(q_values.shape[0], q_values.shape[2])
        
        if stoch == False:
            t_idx = q_values.argmax(dim=1)
            out = torch.zeros(t_idx.shape)
            for i in range(out.shape[0]):
                out[i] = (self.te_intervals[t_idx[i]][0] + self.te_intervals[t_idx[i]][1]) / 2.
                
        return out, hidden

In [46]:
class AgentAct:
    def __init__(self, input_size, hidden_layer, n_lstm, out_shape):
        self.net = NetAct(input_size, hidden_layer, n_lstm, out_shape)
        self.target_net = NetAct(input_size, hidden_layer, n_lstm, out_shape)
        
        self.te_intervals = te_intervals
        
    def sample_action(self, x, hidden, stoch=False):
        q_values, hidden = self.net(x, hidden)
        q_values = q_values.view(q_values.shape[0], q_values.shape[2])
        
        if stoch == False:
            act_idx = q_values.argmax(dim=1)
            
        return act_idx, hidden

### Replay Buffer
shamelessly stolen from [here](https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html)

In [59]:
import random

class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, datum):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = datum
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [60]:
replay_buffer = ReplayMemory(2 ** 12)

In [61]:
env = PMEnv(data=env_matrix, intervals_te_rew=te_intervals, column_to_time_features=column_feature, window_size=window_size)

te_agent = AgentTeDiscrete(input_size=27 * 2, hidden_layer=64, n_lstm=1, te_intervals=te_intervals)
ac_agent = AgentAct(input_size=27 * 2, hidden_layer=64, n_lstm=1, out_shape=n_classes)

lstm_model_act = Net(output_layer=n_classes).float()

inp = env.reset()
n_traces = inp.shape[0]
inp = inp.view(n_traces, 1, -1).float()
# predictor is a NN, it works with batches of states
is_done = False
h_a = torch.zeros(1, n_traces, 64)
c_a = torch.zeros(1, n_traces, 64)
h_t = torch.zeros(1, n_traces, 64)
c_t = torch.zeros(1, n_traces, 64)
while not is_done:
       
    next_ac, (h_a, c_a) = ac_agent.sample_action(x=inp, hidden=(h_a, c_a))
    next_te, (h_t, c_t) = te_agent.sample_action(x=inp, hidden=(h_t, c_t))
        
    n_inp, (reward_te, reward_act), is_done, add_inf = env.step(next_te, next_ac)
    n_inp = n_inp.view(n_traces, 1, -1).float()
    datum = (inp, (next_te, next_ac), n_inp, (reward_te, reward_act))
    replay_buffer.push(datum)
    
    inp = n_inp

In [63]:
replay_buffer.sample(2)[0]

(tensor([[[9.6000e+02, 2.9472e+05, 3.2904e+04, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 9.6000e+02, 2.9568e+05, 3.3864e+04,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],
 
         [[9.6000e+02, 2.9472e+05, 3.3247e+04, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
           0.0