# Running time estimation

This notebook estimates the running time of fitting various models.

In [1]:
import collections
import csv
import numpy as np
import pickle

from mixmarkov import (
    CTMC,
    GamMixCTMC,
    FiniteMixCTMC,
    summarize_sequences,
)

In [2]:
res = {
    "venticu": np.zeros(3),
    "sleep": np.zeros(3),
    "ebmt": np.zeros(3),
    "customers": np.zeros(3),
}

### venticu

In [3]:
seqs = list()
with open("../../data/ventICU.dat") as f:
    next(f)  # First line is header.
    cur = None
    for line in f:
        idx, age, sex, _, src, dst, estart, estop = line.strip().split(" ")
        src = int(src) - 1
        dst = int(dst) - 1
        if idx != cur:
            if cur is not None:
                seqs.append(seq)
            seq = list()
            cur = idx
            seq.append((src, float(estart)))
        if dst == 999 - 1:
            dst = src
        seq.append((dst, float(estop)))
    seqs.append(seq)
seqs = np.array(seqs, dtype=object)

n = 4
ks, ts = summarize_sequences(seqs, n)
mask = np.sum(ks, axis=0).astype(bool)
np.fill_diagonal(mask, False)  # Remove self-transitions

In [4]:
model = CTMC(mask)
model.fit(ks, ts)



Optimization terminated successfully.
         Current function value: 4299.424575
         Iterations: 14
         Function evaluations: 15
         Gradient evaluations: 15
         Hessian evaluations: 14


In [5]:
r = %timeit -o model.fit(ks, ts, verbose=False)
res["venticu"][0] = r.average

20.5 ms ± 1.77 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
model = GamMixCTMC(mask)
model.fit(ks, ts)

Optimization terminated successfully.
         Current function value: 4277.357304
         Iterations: 28
         Function evaluations: 39
         Gradient evaluations: 39
         Hessian evaluations: 28


In [7]:
r = %timeit -o model.fit(ks, ts, verbose=False)
res["venticu"][1] = r.average

490 ms ± 8.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
model = FiniteMixCTMC(mask, n_comps=3)
model.fit(ks, ts, seed=0, verbose=False)

In [9]:
r = %timeit -o model.fit(ks, ts, verbose=False)
res["venticu"][2] = r.average

854 ms ± 9.74 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### sleep

In [10]:
data = collections.defaultdict(list)
with open("../../data/sleep.dat") as f:
    next(f)  # First line is header.
    for line in f:
        idx, st, beg, end, tas, tsa, tnr, trn, cortisol = line.strip().split(" ")
        idx = int(idx)
        st = int(st)
        ival = (float(beg), float(end))
        trans = np.array(list(map(int, (tas, tsa, tnr, trn))))
        cortisol = float(cortisol)
        data[idx].append((st, ival, trans, cortisol))
seqs = list()
for idx, vals in data.items():
    seq = list()
    cur = None
    for st, (beg, end), _, _ in vals:
        if st != cur:
            seq.append((st-1, beg))
        cur = st
    seqs.append(seq)
seqs = np.array(seqs, dtype=object)

n = 3
ks, ts = summarize_sequences(seqs, n)
mask = np.sum(ks, axis=0).astype(bool)
np.fill_diagonal(mask, False)  # Remove self-transitions

In [11]:
model = CTMC(mask)
model.fit(ks, ts)

Optimization terminated successfully.
         Current function value: 17843.925907
         Iterations: 15
         Function evaluations: 16
         Gradient evaluations: 16
         Hessian evaluations: 15


In [12]:
r = %timeit -o model.fit(ks, ts, verbose=False)
res["sleep"][0] = r.average

17.3 ms ± 229 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [13]:
model = GamMixCTMC(mask)
model.fit(ks, ts)

Optimization terminated successfully.
         Current function value: 17465.631606
         Iterations: 19
         Function evaluations: 21
         Gradient evaluations: 21
         Hessian evaluations: 19


In [14]:
r = %timeit -o model.fit(ks, ts, verbose=False)
res["sleep"][1] = r.average

65.8 ms ± 1.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
model = FiniteMixCTMC(mask, n_comps=10)
model.fit(ks, ts, seed=0, verbose=False)

In [16]:
r = %timeit -o model.fit(ks, ts, seed=0, verbose=False)
res["sleep"][2] = r.average

1.77 s ± 11.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### ebmt

In [17]:
rng = np.random.default_rng(seed=0)

vecs_year = rng.normal(size=(3, 10))
year_map = {
    "1985-1989": vecs_year[0],
    "1990-1994": vecs_year[1],
    "1995-1998": vecs_year[2],
}

vecs_agecl = rng.normal(size=(3, 10))
agecl_map = {
    "<=20": vecs_agecl[0],
    "20-40": vecs_agecl[1],
    ">40": vecs_agecl[2],
}

rng = np.random.default_rng(seed=0)

seqs = list()
feats = list()

with open("../../data/ebmt.dat") as f:
    #next(f)  # First line is header.
    cur = None
    for row in csv.DictReader(f, delimiter=" "):
        #idx, src, dst, _, start, stop, _, status, match, proph, year, agecls = line.strip().split(" ")
        src = int(row["from"]) - 1
        dst = int(row["to"]) - 1
        if row["id"] != cur:
            if cur is not None:
                if seq[-1][1] < max_stop:
                    seq.append((seq[-1][0], max_stop))
                seqs.append(seq)
            seq = list()
            cur = row["id"]
            seq.append((src, float(row["Tstart"])))
            feats.append(np.concatenate((
                year_map[row["year"]],
                agecl_map[row["agecl"]],
                rng.normal(size=(20,))
            )))
        if row["status"] == "1":
            seq.append((dst, float(row["Tstop"])))
        max_stop = float(row["Tstop"])
    seqs.append(seq)
    
seqs = np.array(seqs, dtype=object)
xs = np.array(feats)

n = 6   
ks, ts = summarize_sequences(seqs, n)
mask = np.sum(ks, axis=0).astype(bool)
np.fill_diagonal(mask, False)  # Remove self-transitions

In [18]:
model = CTMC(mask)
model.fit(ks, ts, xs=xs, l2=3.0)

Optimization terminated successfully.
         Current function value: 28580.651430
         Iterations: 22
         Function evaluations: 23
         Gradient evaluations: 23
         Hessian evaluations: 22


In [19]:
r = %timeit -o model.fit(ks, ts, xs=xs, l2=3.0, verbose=False)
res["ebmt"][0] = r.average

8.6 s ± 156 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [20]:
model = GamMixCTMC(mask)
model.fit(ks, ts, xs=xs, l2=3.0)

Optimization terminated successfully.
         Current function value: 22434.976316
         Iterations: 20
         Function evaluations: 23
         Gradient evaluations: 23
         Hessian evaluations: 20


In [21]:
r = %timeit -o model.fit(ks, ts, xs=xs, l2=3.0, verbose=False)
res["ebmt"][1] = r.average

51.3 s ± 236 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%time
n_comps = 5
l2 = 3.0

model = FiniteMixCTMC(mask, n_comps=n_comps)
model.fit(ks, ts, xs=xs, seed=0, l2=l2, verbose=True)

cost: 32421.902, imp: inf%
cost: 30868.488, imp: 5.03%
cost: 27874.749, imp: 10.74%
cost: 26276.768, imp: 6.08%
cost: 25430.502, imp: 3.33%
cost: 24926.268, imp: 2.02%
cost: 24588.689, imp: 1.37%
cost: 24331.315, imp: 1.06%
cost: 24118.985, imp: 0.88%
cost: 23912.336, imp: 0.86%
cost: 23741.616, imp: 0.72%
cost: 23609.615, imp: 0.56%
cost: 23476.748, imp: 0.57%
cost: 23355.689, imp: 0.52%
cost: 23249.634, imp: 0.46%
cost: 23115.833, imp: 0.58%
cost: 23011.176, imp: 0.45%
cost: 22935.398, imp: 0.33%
cost: 22885.504, imp: 0.22%
cost: 22829.855, imp: 0.24%
cost: 22782.599, imp: 0.21%
cost: 22739.307, imp: 0.19%
cost: 22706.725, imp: 0.14%
cost: 22680.251, imp: 0.12%
cost: 22654.010, imp: 0.12%
cost: 22610.662, imp: 0.19%
cost: 22548.547, imp: 0.28%
cost: 22474.167, imp: 0.33%
cost: 22410.182, imp: 0.29%
cost: 22338.028, imp: 0.32%
cost: 22282.656, imp: 0.25%
cost: 22233.409, imp: 0.22%
cost: 22195.006, imp: 0.17%
cost: 22166.163, imp: 0.13%
cost: 22129.088, imp: 0.17%
cost: 22082.137, imp

In [9]:
r = %timeit -o model.fit(ks, ts, xs=xs, seed=0, l2=l2, verbose=False)
res["ebmt"][2] = r.average

25min 50s ± 12 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
res["ebmt"][2] = 25.0 * 60 + 12.0

### customers

In [23]:
res["customers"] = np.array([171.35907648, 721.5355354, 52533.0])

## Save results

In [24]:
with open("../../data/runningtime.pickle", "wb") as f:
    pickle.dump(res, f)