In [28]:
# Read data from the predictive output

import pandas as pd
import numpy as np
from category_encoders.target_encoder import TargetEncoder



<IPython.core.display.Javascript object>

In [2]:


train_data = pd.read_pickle("./results/predicitve/bpic2012/train_.pkl")
test_data = pd.read_pickle("./results/predicitve/bpic2012/test_.pkl")
cal_data = pd.read_pickle("./results/predicitve/bpic2012/valid_.pkl")

print("Read encoded data...")
df_agg = pd.read_csv(
    "./results/predicitve/bpic2012/dt_transformed_agg_bpic2012.csv",
    low_memory=False,
    sep=";",
)
df_static = pd.read_csv(
    "./results/predicitve/bpic2012/dt_transformed_static_bpic2012.csv",
    low_memory=False,
    sep=";",
)

static_agg_df = pd.concat([df_static, df_agg], axis=1)
cat_feat_idx = np.where(static_agg_df.dtypes != float)[0]

#  rename columns
train_data.columns = list(static_agg_df.columns) + ["Outcome"]  # + ["Treatment"]
test_data.columns = list(static_agg_df.columns) + ["Outcome"]  # + ["Treatment"]
cal_data.columns = list(static_agg_df.columns) + ["Outcome"]  # + ["Treatment"]


train_data["time_to_event_m"] = pd.to_numeric(train_data["time_to_event_m"])
test_data["time_to_event_m"] = pd.to_numeric(test_data["time_to_event_m"])
cal_data["time_to_event_m"] = pd.to_numeric(cal_data["time_to_event_m"])



cal_df = cal_data.copy()
test_df = test_data.copy()
train_df = train_data.copy()

dfs = [cal_df, test_df, train_df]
for df in dfs:

    for i in cat_feat_idx:
        col = df.columns[i]

        if col == "event":
            continue
        te = TargetEncoder()
        df[col] = te.fit_transform(df[col], df.time_to_event_m)


# Add event column that indicates all observations are uncencored
# All observation are complete
train_df["event"] = 1
train_df["event"] = train_df["event"].astype("bool")

test_df["event"] = 1
test_df["event"] = test_df["event"].astype("bool")

cal_df["event"] = 1
cal_df["event"] = cal_df["event"].astype("bool")

# Remove theses columns becusal they are not working withthe Cox model
train_df = train_df[
    train_df.columns.difference(
        ["timesincelastevent_min", "timesincecasestart_min", "event_nr_min"]
    )
]

test_df = test_df[
    test_df.columns.difference(
        ["timesincelastevent_min", "timesincecasestart_min", "event_nr_min"]
    )
]

cal_df = cal_df[
    cal_df.columns.difference(
        ["timesincelastevent_min", "timesincecasestart_min", "event_nr_min"]
    )
]


Read encoded data...


<IPython.core.display.Javascript object>

In [3]:

from lifelines import CoxPHFitter
from sklearn.model_selection import train_test_split
from lifelines.utils import concordance_index


y_test = test_df["time_to_event_m"]
X_test = test_df.drop(["time_to_event_m"], axis=1)

y_train = train_df["time_to_event_m"]
X_train = train_df.drop(["time_to_event_m"], axis=1)

y_cal = cal_df["time_to_event_m"]
X_cal = cal_df.drop(["time_to_event_m"], axis=1)

X_cal1, X_cal2, y_cal1, y_cal2 = train_test_split(
    X_cal, y_cal, test_size=0.5, random_state=22
)




<IPython.core.display.Javascript object>

In [4]:
time_col = "time_to_event_m"
ev_col = "event"


def survival_model(X_train, y_train, time_col, ev_col):
    train_data = pd.concat([X_train, y_train], axis=1)
    cph = CoxPHFitter(penalizer=0.1)
    cph.fit(
        train_data, duration_col=time_col, event_col=ev_col, show_progress=True,
    )

    return cph

<IPython.core.display.Javascript object>

# Naive method

In [5]:
model = survival_model(X_train, y_train, time_col, ev_col)
preds_cal = model.predict_expectation(X_cal)

Iteration 1: norm_delta = 0.58775, step_size = 0.9500, log_lik = -693747.06025, newton_decrement = 9350.65724, seconds_since_start = 0.9
Iteration 2: norm_delta = 0.11262, step_size = 0.9500, log_lik = -684715.69332, newton_decrement = 481.03780, seconds_since_start = 1.7
Iteration 3: norm_delta = 0.01640, step_size = 0.9500, log_lik = -684215.13725, newton_decrement = 8.85363, seconds_since_start = 2.5
Iteration 4: norm_delta = 0.00033, step_size = 1.0000, log_lik = -684206.17458, newton_decrement = 0.00372, seconds_since_start = 3.4
Iteration 5: norm_delta = 0.00000, step_size = 1.0000, log_lik = -684206.17086, newton_decrement = 0.00000, seconds_since_start = 4.2
Convergence success after 5 iterations.


<IPython.core.display.Javascript object>

In [6]:
def calculate_q_yhat_naive(preds_cal, y_cal, alpha):
    print(f"\nAlpha: {alpha}")

    N = len(y_cal)
    q_yhat = np.quantile(np.abs(y_cal - preds_cal), np.ceil((N + 1) * (1 - alpha)) / N)
    print(f"qhat: {q_yhat}")

    return q_yhat


# conformal prediction object
alpha = np.round(np.arange(0.1, 1.0, 0.1), 1)

# Naive
qhat_naive = {a: calculate_q_yhat_naive(preds_cal, y_cal, a) for a in alpha}
qhat_naive


Alpha: 0.1
qhat: 26970.25087358547

Alpha: 0.2
qhat: 20010.49417937487

Alpha: 0.3
qhat: 16833.100772299123

Alpha: 0.4
qhat: 13685.727170601333

Alpha: 0.5
qhat: 10903.287461134283

Alpha: 0.6
qhat: 8164.205523482662

Alpha: 0.7
qhat: 5948.143471258927

Alpha: 0.8
qhat: 3912.6502744620498

Alpha: 0.9
qhat: 1942.5349757338063


{0.1: 26970.25087358547,
 0.2: 20010.49417937487,
 0.3: 16833.100772299123,
 0.4: 13685.727170601333,
 0.5: 10903.287461134283,
 0.6: 8164.205523482662,
 0.7: 5948.143471258927,
 0.8: 3912.6502744620498,
 0.9: 1942.5349757338063}

<IPython.core.display.Javascript object>

In [7]:
preds_test = np.array(model.predict_expectation(X_test))
preds_test

array([13963.33265667, 13678.31999167, 13394.22845268, ...,
       20240.10094636, 19884.8216485 , 19536.56560945])

<IPython.core.display.Javascript object>

In [8]:
def calculate_coverage_naive(lower_bound, upper_bound, y_test, alpha):

    lower_bound = np.array(lower_bound)
    upper_bound = np.array(upper_bound)
    y_test = np.array(y_test)
    out_of_bound = 0
    N = len(y_test)

    for i in range(N):
        if y_test[i] < lower_bound[i] or y_test[i] > upper_bound[i]:
            out_of_bound += 1

    #     print(
    #         f"Alpha is: {alpha}, with Coverage of {1 - out_of_bound / N}, Lower bound: {lower_bound}, and Upper bound: {upper_bound}"
    #     )
    return 1 - out_of_bound / N, lower_bound, upper_bound


# lower_bound = preds_test - qhat
# upper_bound = preds_test + qhat


pred_intervals_naive = {
    alpha: calculate_coverage_naive(preds_test - qhat, preds_test + qhat, y_test, alpha)
    for alpha, qhat in qhat_naive.items()
}
pred_intervals_naive

{0.1: (0.9265694814279911,
  array([-13006.91821692, -13291.93088191, -13576.0224209 , ...,
          -6730.14992722,  -7085.42922508,  -7433.68526414]),
  array([40933.58353026, 40648.57086526, 40364.47932627, ...,
         47210.35181995, 46855.07252209, 46506.81648303])),
 0.2: (0.8600893622782868,
  array([-6047.1615227 , -6332.1741877 , -6616.26572669, ...,
           229.60676699,  -125.67253087,  -473.92856993]),
  array([33973.82683604, 33688.81417105, 33404.72263206, ...,
         40250.59512574, 39895.31582788, 39547.05978882])),
 0.3: (0.8253824976305457,
  array([-2869.76811563, -3154.78078063, -3438.87231962, ...,
          3407.00017406,  3051.72087621,  2703.46483715]),
  array([30796.43342897, 30511.42076397, 30227.32922498, ...,
         37073.20171866, 36717.9224208 , 36369.66638175])),
 0.4: (0.7777226158776007,
  array([ 277.60548607,   -7.40717893, -291.49871792, ..., 6554.37377576,
         6199.0944779 , 5850.83843885]),
  array([27649.05982727, 27364.04716228, 2

<IPython.core.display.Javascript object>

# Adaptative intervals

Conformalized residual fitting

In [9]:
# calculate residuals
r_y = np.abs(y_cal1 - model.predict_expectation(X_cal1))
r_y = pd.DataFrame(r_y,)
r_y.columns = ["time_to_event_m"]
r_y["time_to_event_m"] = pd.to_numeric(r_y["time_to_event_m"])


# fit model residual on residuals
model_r = survival_model(X_cal1, r_y, time_col, ev_col)



Iteration 1: norm_delta = 0.40325, step_size = 0.9500, log_lik = -68466.21452, newton_decrement = 828.87755, seconds_since_start = 0.5
Iteration 2: norm_delta = 0.09812, step_size = 0.9500, log_lik = -67592.03372, newton_decrement = 46.27807, seconds_since_start = 0.9
Iteration 3: norm_delta = 0.02535, step_size = 0.9500, log_lik = -67542.43872, newton_decrement = 1.84761, seconds_since_start = 1.4
Iteration 4: norm_delta = 0.00114, step_size = 1.0000, log_lik = -67540.54300, newton_decrement = 0.00324, seconds_since_start = 1.8
Iteration 5: norm_delta = 0.00000, step_size = 1.0000, log_lik = -67540.53976, newton_decrement = 0.00000, seconds_since_start = 2.2
Convergence success after 5 iterations.


<IPython.core.display.Javascript object>

In [10]:
# check for coverage without ICP
preds_test = model.predict_expectation(X_test)
preds_test_r = model_r.predict_expectation(X_test)

<IPython.core.display.Javascript object>

In [11]:
# Without conformal
lower_bound = preds_test - preds_test_r
upper_bound = preds_test + preds_test_r


pred_intervals_adaptive_1 = {
    alpha: calculate_coverage_naive(
        np.array(lower_bound), np.array(upper_bound), y_test, alpha
    )
    for alpha, qhat in qhat_naive.items()
}
pred_intervals_adaptive_1

{0.1: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 20106.4669932 , ...,
         29601.00978192, 29202.94804432, 28814.9120552 ])),
 0.2: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 20106.4669932 , ...,
         29601.00978192, 29202.94804432, 28814.9120552 ])),
 0.3: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 20106.4669932 , ...,
         29601.00978192, 29202.94804432, 28814.9120552 ])),
 0.4: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 2

<IPython.core.display.Javascript object>

In [12]:
# With Conformal
# calculate q_yhat

preds_cal2 = model.predict_expectation(X_cal2)
preds_cal2_r = model_r.predict_expectation(X_cal2)
N = len(y_cal2)

def calculate_q_yhat_adaptive(preds_cal2, preds_cal2_r, y_cal2, a):
    
    q_yhat = np.quantile(np.abs(y_cal2 - preds_cal2) / preds_cal2_r, np.ceil((N + 1) * (1 - a)) / N)
    return q_yhat




# conformal prediction object
alpha = np.round(np.arange(0.1, 1.0, 0.1), 1)

# Naive
qhat_adaptive = {
    a: calculate_q_yhat_adaptive(preds_cal2, preds_cal2_r, y_cal2, a) for a in alpha
}  # {alhp: q_hat}


print(qhat_adaptive)



{0.1: 1.8598394632208528, 0.2: 1.4239345048851684, 0.3: 1.2081600945107558, 0.4: 1.05674139574643, 0.5: 0.8770245508950725, 0.6: 0.7110337760249272, 0.7: 0.5258404358354893, 0.8: 0.35690921034159007, 0.9: 0.18510043370075296}


<IPython.core.display.Javascript object>

In [13]:
pred_intervals_adaptive_2 = {
    alpha: calculate_coverage_naive(
        preds_test - qhat * preds_test_r,
        preds_test + qhat * preds_test_r,
        y_test,
        alpha,
    )
    for alpha, qhat in qhat_adaptive.items()
}
pred_intervals_adaptive_2

{0.1: (0.8387868393735614,
  array([1361.21049552, 1074.52646993,  910.54232848, ..., 2830.31328238,
         2554.60245429, 2280.3307362 ]),
  array([26565.45481782, 26282.11351342, 25877.91457688, ...,
         37649.88861034, 37215.04084271, 36792.8004827 ])),
 0.2: (0.7829579816762198,
  array([4314.86732678, 4028.5750309 , 3836.44038982, ..., 6910.77985833,
         6616.41995263, 6324.80795706]),
  array([23611.79798656, 23328.06495245, 22952.01651554, ...,
         33569.42203439, 33153.22334438, 32748.32326184])),
 0.3: (0.7306043236900303,
  array([5776.93732398, 5490.83893562, 5284.7697032 , ..., 8930.62444289,
         8627.03318148, 8326.83769064]),
  array([22149.72798936, 21865.80104773, 21503.68720217, ...,
         31549.57744983, 31142.61011553, 30746.29352826])),
 0.4: (0.6596560906259873,
  array([ 6802.93821118,  6516.97589652,  6301.1281288 , ...,
         10348.04107802, 10037.97175525,  9731.75283614]),
  array([21123.72710216, 20839.66408683, 20487.32877657, ...

<IPython.core.display.Javascript object>


# Conformalized Quantile Regression

Solution for the foregoing problem: Quantile Regression

In [14]:
alpha = np.round(np.arange(0.1, 1.0, 0.1), 1)


def calibrate_qyhat(y_true, lower_bound, upper_bound, alpha):

    N = len(y_true)
    s = np.amax([lower_bound - y_true, y_true - upper_bound], axis=0)
    q_yhat = np.quantile(s, np.ceil((N + 1) * (1 - alpha)) / N)

    return q_yhat


# check for coverage without ICP
preds_cal = model.predict_expectation(X_cal)
preds_cal_r = model_r.predict_expectation(X_cal)
lower_bound = preds_cal - preds_cal_r
upper_bound = preds_cal + preds_cal_r

# Adaptive
qhat_adaptive_QR = {
    a: calibrate_qyhat(y_cal, lower_bound, upper_bound, a) for a in alpha
}  # {alhp: q_hat}


qhat_adaptive_QR


{0.1: 10967.44878007338,
 0.2: 5772.988467423574,
 0.3: 2785.0592205712323,
 0.4: 695.0092491293094,
 0.5: -1566.4758656034317,
 0.6: -3473.146993597392,
 0.7: -5334.151731622324,
 0.8: -7066.688550621269,
 0.9: -9144.01505473237}

<IPython.core.display.Javascript object>

In [15]:
forecast = model.predict_expectation(X_test)


pred_intervals_adaptive_3 = {
    alpha: calculate_coverage_naive(
        np.array(forecast - qhat), np.array(forecast + qhat), y_test, alpha,
    )
    for alpha, qhat in qhat_adaptive_QR.items()
}
pred_intervals_adaptive_3

{0.1: (0.7213973010786658,
  array([2995.8838766 , 2710.8712116 , 2426.77967261, ..., 9272.65216629,
         8917.37286843, 8569.11682938]),
  array([24930.78143674, 24645.76877175, 24361.67723276, ...,
         31207.54972644, 30852.27042858, 30504.01438952])),
 0.2: (0.48937130477952795,
  array([ 8190.34418925,  7905.33152425,  7621.23998526, ...,
         14467.11247894, 14111.83318108, 13763.57714203]),
  array([19736.32112409, 19451.3084591 , 19167.21692011, ...,
         26013.08941379, 25657.81011593, 25309.55407687])),
 0.3: (0.2785124339937717,
  array([11178.2734361 , 10893.2607711 , 10609.16923211, ...,
         17455.04172579, 17099.76242793, 16751.50638888]),
  array([16748.39187724, 16463.37921225, 16179.28767325, ...,
         23025.16016693, 22669.88086908, 22321.62483002])),
 0.4: (0.07478449248544483,
  array([13268.32340754, 12983.31074254, 12699.21920355, ...,
         19545.09169723, 19189.81239937, 18841.55636032]),
  array([14658.3419058 , 14373.3292408 , 14089

<IPython.core.display.Javascript object>

In [16]:
# Naive method
pred_intervals_naive

{0.1: (0.9265694814279911,
  array([-13006.91821692, -13291.93088191, -13576.0224209 , ...,
          -6730.14992722,  -7085.42922508,  -7433.68526414]),
  array([40933.58353026, 40648.57086526, 40364.47932627, ...,
         47210.35181995, 46855.07252209, 46506.81648303])),
 0.2: (0.8600893622782868,
  array([-6047.1615227 , -6332.1741877 , -6616.26572669, ...,
           229.60676699,  -125.67253087,  -473.92856993]),
  array([33973.82683604, 33688.81417105, 33404.72263206, ...,
         40250.59512574, 39895.31582788, 39547.05978882])),
 0.3: (0.8253824976305457,
  array([-2869.76811563, -3154.78078063, -3438.87231962, ...,
          3407.00017406,  3051.72087621,  2703.46483715]),
  array([30796.43342897, 30511.42076397, 30227.32922498, ...,
         37073.20171866, 36717.9224208 , 36369.66638175])),
 0.4: (0.7777226158776007,
  array([ 277.60548607,   -7.40717893, -291.49871792, ..., 6554.37377576,
         6199.0944779 , 5850.83843885]),
  array([27649.05982727, 27364.04716228, 2

<IPython.core.display.Javascript object>

In [17]:
# Adaptive without conformal
pred_intervals_adaptive_1

{0.1: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 20106.4669932 , ...,
         29601.00978192, 29202.94804432, 28814.9120552 ])),
 0.2: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 20106.4669932 , ...,
         29601.00978192, 29202.94804432, 28814.9120552 ])),
 0.3: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 20106.4669932 , ...,
         29601.00978192, 29202.94804432, 28814.9120552 ])),
 0.4: (0.6308615787335831,
  array([ 7187.41333118,  6901.50200765,  6681.98991217, ...,
         10879.19211081, 10566.69525269, 10258.21916369]),
  array([20739.25198216, 20455.13797569, 2

<IPython.core.display.Javascript object>

In [18]:
# Adaptative intervals - Conformalized residual fitting

pred_intervals_adaptive_2

{0.1: (0.8387868393735614,
  array([1361.21049552, 1074.52646993,  910.54232848, ..., 2830.31328238,
         2554.60245429, 2280.3307362 ]),
  array([26565.45481782, 26282.11351342, 25877.91457688, ...,
         37649.88861034, 37215.04084271, 36792.8004827 ])),
 0.2: (0.7829579816762198,
  array([4314.86732678, 4028.5750309 , 3836.44038982, ..., 6910.77985833,
         6616.41995263, 6324.80795706]),
  array([23611.79798656, 23328.06495245, 22952.01651554, ...,
         33569.42203439, 33153.22334438, 32748.32326184])),
 0.3: (0.7306043236900303,
  array([5776.93732398, 5490.83893562, 5284.7697032 , ..., 8930.62444289,
         8627.03318148, 8326.83769064]),
  array([22149.72798936, 21865.80104773, 21503.68720217, ...,
         31549.57744983, 31142.61011553, 30746.29352826])),
 0.4: (0.6596560906259873,
  array([ 6802.93821118,  6516.97589652,  6301.1281288 , ...,
         10348.04107802, 10037.97175525,  9731.75283614]),
  array([21123.72710216, 20839.66408683, 20487.32877657, ...

<IPython.core.display.Javascript object>

In [19]:
# Adaptative intervals -  Conformalized Quantile Regression

pred_intervals_adaptive_3

{0.1: (0.7213973010786658,
  array([2995.8838766 , 2710.8712116 , 2426.77967261, ..., 9272.65216629,
         8917.37286843, 8569.11682938]),
  array([24930.78143674, 24645.76877175, 24361.67723276, ...,
         31207.54972644, 30852.27042858, 30504.01438952])),
 0.2: (0.48937130477952795,
  array([ 8190.34418925,  7905.33152425,  7621.23998526, ...,
         14467.11247894, 14111.83318108, 13763.57714203]),
  array([19736.32112409, 19451.3084591 , 19167.21692011, ...,
         26013.08941379, 25657.81011593, 25309.55407687])),
 0.3: (0.2785124339937717,
  array([11178.2734361 , 10893.2607711 , 10609.16923211, ...,
         17455.04172579, 17099.76242793, 16751.50638888]),
  array([16748.39187724, 16463.37921225, 16179.28767325, ...,
         23025.16016693, 22669.88086908, 22321.62483002])),
 0.4: (0.07478449248544483,
  array([13268.32340754, 12983.31074254, 12699.21920355, ...,
         19545.09169723, 19189.81239937, 18841.55636032]),
  array([14658.3419058 , 14373.3292408 , 14089

<IPython.core.display.Javascript object>

In [34]:
dt_test_prefixes = pd.read_csv(
    "./results/predicitve/bpic2012/dt_test_prefixes_bpic2012.csv", sep=";"
)
dt_test_prefixes.columns

Index(['Case ID', 'NumberOfOffers', 'start_time', 'AMOUNT_REQ', 'REG_DATE',
       'Activity', 'Resource', 'timesincemidnight', 'month', 'weekday', 'hour',
       'timesincelastevent', 'timesincecasestart', 'event_nr',
       'timesincecasestart_days', 'timesincecasestart_wks',
       'timesincelastevent_days', 'timesincelastevent_wks', 'time_to_event_m',
       'case_length', 'open_cases', 'label', 'treatment', 'event', 'prefix_nr',
       'orig_case_id'],
      dtype='object')

<IPython.core.display.Javascript object>

In [35]:
test_conformal_causal = pd.read_csv(
    "./results/conformal_causal/bpic2012/test_bpic2012.csv", sep=";"
)
test_conformal_causal

Unnamed: 0,NumberOfOffers,AMOUNT_REQ,Activity_A_ACCEPTED,Activity_A_ACTIVATED,Activity_A_APPROVED,Activity_A_CANCELLED,Activity_A_DECLINED,Activity_A_FINALIZED,Activity_A_PARTLYSUBMITTED,Activity_A_PREACCEPTED,...,Proba_if_Untreated,CATE,lower_naive,upper_naive,upper_counterfactual,lower_counterfactual,lower_inexact,upper_inexact,lower_exact,upper_exact
0,1,7000,0,0,0,0,0,0,0,0,...,0501587,-0009087235,-0123467404,0145065474,00666029,-011121638,-0127574965,00698426300000001,-0239834581,0204037251
1,1,7000,1,0,0,0,0,1,1,1,...,0501587,0010231197,-0128044794,0228189104,010284458,-011820258,-0139364065,0182641015,-0258987681,0281437301
2,1,7000,1,0,0,0,0,1,1,1,...,0501587,0010231197,-0130332364,0231090694,011405392,-012643718,-0140649295,0184919075,-0259391881,0284074491
3,1,7000,1,0,0,0,0,1,1,1,...,050703037,-0024962097,-0284422434,0385288694,026088314,-019905711,-0231142245,0253676705,-0343015581,0361846581
4,1,7000,1,0,0,0,0,1,1,1,...,025145835,-0016653031,-0393752174,0259124624,013198945,-035476426,-0358393105,0159680115,-0526636901,0284842781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22152,1,5000,0,0,0,0,0,0,1,1,...,048184294,013776064,-0233736854,0478069674,034330462,-010661902,-0087402505,0332704305,-0232232281,0435316941
22153,1,5000,1,0,0,0,0,0,1,1,...,048184294,0131163,-0243530134,0474979634,033392259,-011319324,-0085266975,0330435235,-0231555201,0427631021
22154,1,5000,1,0,0,0,0,1,1,1,...,048224095,013076499,-0236467494,0478069674,033392259,-011321668,-00845107850000001,0328032465,-0230369211,0427631021
22155,1,5000,1,0,0,0,0,1,1,1,...,048224095,013076499,-0233473344,0482733424,032293888,-011321668,-0084209125,0328032465,-0230046791,042595942


<IPython.core.display.Javascript object>

In [36]:
test_conformal_causal["prefix_nr"] = list(
    dt_test_prefixes.groupby("Case ID").first()["prefix_nr"]
)
test_conformal_causal["case_id"] = list(
    dt_test_prefixes.groupby("Case ID").first()["orig_case_id"]
)
test_conformal_causal["activity"] = list(
    dt_test_prefixes.groupby("Case ID").last()["Activity"]
)
test_conformal_causal["timestamp"] = list(
    dt_test_prefixes.groupby("Case ID").last()["start_time"]
)
test_conformal_causal = test_conformal_causal.sort_values(by=["timestamp"]).reset_index(
    drop=True
)
test_conformal_causal

Unnamed: 0,NumberOfOffers,AMOUNT_REQ,Activity_A_ACCEPTED,Activity_A_ACTIVATED,Activity_A_APPROVED,Activity_A_CANCELLED,Activity_A_DECLINED,Activity_A_FINALIZED,Activity_A_PARTLYSUBMITTED,Activity_A_PREACCEPTED,...,upper_counterfactual,lower_counterfactual,lower_inexact,upper_inexact,lower_exact,upper_exact,prefix_nr,case_id,activity,timestamp
0,1,7000,0,0,0,0,0,0,0,0,...,00666029,-011121638,-0127574965,00698426300000001,-0239834581,0204037251,1,204454,A_SUBMITTED,2012-01-27 14:32:16.363
1,1,7000,0,0,0,0,0,0,1,0,...,007246365,-011121638,-0125678235,0065181855,-0240891181,0204037251,2,204454,A_PARTLYSUBMITTED,2012-01-27 14:32:16.811
2,1,7000,0,0,0,0,0,0,1,1,...,010470639,-011259354,-012415464,00698426300000001,-0242242941,0204281801,3,204454,A_PREACCEPTED,2012-01-27 14:33:17.892
3,2,10000,0,0,0,0,0,0,0,0,...,00584197400000001,002005592,00106245300000001,010485485,-010024935,022857829,1,204466,A_SUBMITTED,2012-01-27 14:58:46.061
4,2,10000,0,0,0,0,0,0,1,0,...,006197288,002005592,00105519199999999,00960645999999999,-010222638,0208722861,2,204466,A_PARTLYSUBMITTED,2012-01-27 14:58:46.303
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22152,1,11000,1,1,1,0,0,1,1,1,...,-00160412505,-0303429137,-03283843054,0011286153,-04200533923,01132246323,20,208901,O_ACCEPTED,2012-03-14 14:31:28.859
22153,2,22000,1,1,1,0,0,1,1,1,...,01454346655,-000569737740000003,-0009681573,0273475836,-01202095214,0650106608,48,206615,O_ACCEPTED,2012-03-14 14:33:57.651
22154,2,22000,1,1,0,0,0,1,1,1,...,088681063,-005135263,-0054343009,080464841,-016509365,095263983,45,206615,A_ACTIVATED,2012-03-14 14:33:57.651
22155,2,22000,1,1,1,0,0,1,1,1,...,0266468188,-00116614631,-00157360713,044895175,-01256913269,070280529,47,206615,A_REGISTERED,2012-03-14 14:33:57.651


<IPython.core.display.Javascript object>

In [21]:
test_data["prefix_nr"] = list(dt_test_prefixes.groupby("Case ID").first()["prefix_nr"])
test_data["case_id"] = list(dt_test_prefixes.groupby("Case ID").first()["orig_case_id"])
test_data["activity"] = list(dt_test_prefixes.groupby("Case ID").last()["Activity"])
test_data["timestamp"] = list(dt_test_prefixes.groupby("Case ID").last()["start_time"])
test_data = test_data.sort_values(by=["timestamp"]).reset_index(drop=True)


<IPython.core.display.Javascript object>

In [22]:
test_data["predicted_time_to_event"] = preds_test


<IPython.core.display.Javascript object>

In [23]:
# lower and upper bounds for Alpha = 0.1, it gives the highest coverage.
test_data["lower_time_to_event_adaptive"] = pred_intervals_adaptive_2[0.1][1]
test_data["upper_time_to_event_adaptive"] = pred_intervals_adaptive_2[0.1][2]


<IPython.core.display.Javascript object>

In [24]:
# lower and upper bounds for Alpha = 0.1, it gives the highest coverage.
test_data["lower_time_to_event_adaptive_QR"] = pred_intervals_adaptive_3[0.1][1]
test_data["upper_time_to_event_adaptive_QR"] = pred_intervals_adaptive_3[0.1][2]


<IPython.core.display.Javascript object>

In [25]:
# lower and upper bounds for Alpha = 0.1, it gives the highest coverage.
test_data["lower_time_to_event_naive"] = pred_intervals_naive[0.1][1]
test_data["upper_time_to_event_naive"] = pred_intervals_naive[0.1][2]


<IPython.core.display.Javascript object>

In [26]:
test_data

Unnamed: 0,NumberOfOffers,AMOUNT_REQ,event,Activity,Resource,time_to_event_m,timesincelastevent_mean,timesincelastevent_max,timesincelastevent_min,timesincelastevent_sum,...,case_id,activity,timestamp,predicted_time_to_event,lower_time_to_event_adaptive,upper_time_to_event_adaptive,lower_time_to_event_adaptive_QR,upper_time_to_event_adaptive_QR,lower_time_to_event_naive,upper_time_to_event_naive
0,1.0,7000.0,True,A_SUBMITTED,112.0,18304.061233,0.0,0.0,0.0,0.0,...,204454,A_SUBMITTED,2012-01-27 14:32:16.363,13963.332657,1361.210496,26565.454818,2995.883877,24930.781437,-13006.918217,40933.583530
1,1.0,7000.0,True,A_SUBMITTED,112.0,18304.061233,0.003733,0.007467,0.0,0.007467,...,204454,A_PARTLYSUBMITTED,2012-01-27 14:32:16.811,13678.319992,1074.526470,26282.113513,2710.871212,24645.768772,-13291.930882,40648.570865
2,1.0,7000.0,True,A_SUBMITTED,112.0,18304.061233,0.341828,1.018017,0.0,1.025483,...,204454,A_PREACCEPTED,2012-01-27 14:33:17.892,13394.228453,910.542328,25877.914577,2426.779673,24361.677233,-13576.022421,40364.479326
3,2.0,10000.0,True,A_SUBMITTED,112.0,44236.666117,0.0,0.0,0.0,0.0,...,204466,A_SUBMITTED,2012-01-27 14:58:46.061,10665.972713,-4484.548525,25816.493950,-301.476067,21633.421493,-16304.278161,37636.223586
4,2.0,10000.0,True,A_SUBMITTED,112.0,44236.666117,0.002017,0.004033,0.0,0.004033,...,204466,A_PARTLYSUBMITTED,2012-01-27 14:58:46.303,10808.083597,-4232.505245,25848.672439,-159.365183,21775.532377,-16162.167277,37778.334471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22152,1.0,11000.0,True,W_Valideren aanvraag,11339.0,8902.347483,2159.039528,10152.154967,0.0,43180.790567,...,208901,O_ACCEPTED,2012-03-14 14:31:28.859,20980.556994,3366.416858,38594.697130,10013.108214,31948.005774,-5989.693880,47950.807867
22153,2.0,22000.0,True,W_Valideren aanvraag,11259.0,54651.657883,1138.576206,10309.39775,0.0,54651.657883,...,206615,O_ACCEPTED,2012-03-14 14:33:57.651,20604.758290,3103.306587,38106.209994,9637.309510,31572.207071,-6365.492583,47575.009164
22154,2.0,22000.0,True,W_Valideren aanvraag,11259.0,54651.657883,1214.481286,10309.39775,0.0,54651.657883,...,206615,A_ACTIVATED,2012-03-14 14:33:57.651,20240.100946,2830.313282,37649.888610,9272.652166,31207.549726,-6730.149927,47210.351820
22155,2.0,22000.0,True,W_Valideren aanvraag,11259.0,54651.657883,1162.801232,10309.39775,0.0,54651.657883,...,206615,A_REGISTERED,2012-03-14 14:33:57.651,19884.821649,2554.602454,37215.040843,8917.372868,30852.270429,-7085.429225,46855.072522


<IPython.core.display.Javascript object>

In [27]:
import os

results_surv = "./results/conformal_survival/bpic2012/"
# create results directory
if not os.path.exists(os.path.join(results_surv)):
    os.makedirs(os.path.join(results_surv))

test_data.to_csv(
    os.path.join(results_surv, "test_data_survival_conformal_%s.csv" % "bpic2012",),
    sep=";",
    index=False,
)

<IPython.core.display.Javascript object>

In [37]:
import os

results_surv = "./results/conformal_causal/bpic2012/"
# create results directory
if not os.path.exists(os.path.join(results_surv)):
    os.makedirs(os.path.join(results_surv))

test_conformal_causal.to_csv(
    os.path.join(results_surv, "test_data_causal_conformal_%s.csv" % "bpic2012",),
    sep=";",
    index=False,
)

<IPython.core.display.Javascript object>