## Importing

In [13]:
import pandas as pd
import scipy as sp
import numpy as np

import emcee                     
from multiprocessing import Pool

import lymph

## Creating or Loading Model

In [14]:
NEW_MODEL = True

filename = "./data/extended_system.hdf5"

if not NEW_MODEL:
    extended_systm = lymph.utils.system_from_hdf(
    filename=filename,
    name="extended/model")
else:
    graph = {
        ('tumor', 'primary') : ['II', 'III', 'IV','VII'],
        ('lnl', 'I')         : [], 
        ('lnl', 'II')        : ['I', 'III', 'V', 'VII'], 
        ('lnl', 'III')       : ['IV'], 
        ('lnl', 'IV')        : [],
        ('lnl', 'V')         : [],
        ('lnl', 'VII')       : [],
    }
    extended_systm = lymph.Unilateral(graph=graph)

print(extended_systm)

Unilateral lymphatic system with 1 tumor(s) and 6 LNL(s).
primary-0.0%->II primary-0.0%->III primary-0.0%->IV primary-0.0%->VII II-0.0%->I II-0.0%->III II-0.0%->V II-0.0%->VII III-0.0%->IV


## Modalities

In [15]:
if NEW_MODEL:
    mri_and_pet_spsn = {"MRI": [0.63, 0.81], 
                        "PET": [0.86, 0.79]}
#                           ^     ^
#                  specificty     sensitivity
    extended_systm.modalities = mri_and_pet_spsn

## Data

In [18]:
if NEW_MODEL:
    data = pd.read_csv("./data/USZ_ipsi.csv", 
                   header=[0,1], )
    extended_systm.patient_data = data
data.head()

Unnamed: 0_level_0,PET,PET,PET,PET,PET,PET,MRI,MRI,MRI,MRI,MRI,MRI,info
Unnamed: 0_level_1,I,II,III,IV,V,VII,I,II,III,IV,V,VII,t_stage
0,False,True,False,False,False,False,False,True,False,False,False,False,late
1,,,,,,,False,False,True,False,False,False,early
2,True,True,True,True,False,True,True,True,True,True,False,True,late
3,False,True,True,True,False,True,False,True,True,True,False,True,late
4,False,True,False,False,False,True,,,,,,,early


## Storage of model

In [19]:
if NEW_MODEL:
    extended_systm.to_hdf(
        filename=filename, 
        name="extended/model"
    )

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->MultiIndex([( 'PET',       'I'),
            ( 'PET',      'II'),
            ( 'PET',     'III'),
            ( 'PET',      'IV'),
            ( 'PET',       'V'),
            ( 'PET',     'VII'),
            ( 'MRI',       'I'),
            ( 'MRI',      'II'),
            ( 'MRI',     'III'),
            ( 'MRI',      'IV'),
            ( 'MRI',       'V'),
            ( 'MRI',     'VII'),
            ('info', 't_stage')],
           )]

  store.put(


## Likelihood

In [20]:
def llh(theta, sys, early_p=0.3, max_t=10):
    spread_probs, late_p = theta[:-1], theta[-1]
    
    if late_p > 1. or late_p < 0.:
        return -np.inf
    
    t = np.arange(max_t + 1)
    time_dists={
        "early": lymph.utils.fast_binomial_pmf(t, max_t, early_p),
        "late" : lymph.utils.fast_binomial_pmf(t, max_t, late_p)
    }
    
    return sys.marginal_log_likelihood(
        spread_probs, t_stages=["early", "late"], time_dists=time_dists
    )

## Sampling

In [21]:
if NEW_MODEL:
    # Settings for the sampler
    ndim = len(extended_systm.spread_probs) + 1
    nwalkers = 10 * ndim
    max_steps = 2

    # prepare the backend
    backend = emcee.backends.HDFBackend(
        filename=filename,
        name="extended/samples"
    )
    backend.reset(nwalkers, ndim)

    # the actual sampling round
    with Pool() as pool:
        sampler = lymph.utils.EnsembleSampler(
            nwalkers, ndim, 
            llh, 
            kwargs={"sys": extended_systm}, 
            pool=pool
        )
        acor_list = sampler.run_sampling(max_steps)
        print(acor_list)
else:
    recover_backend = emcee.backends.HDFBackend(filename=filename, name="extended/samples")
    chain = recover_backend.get_chain()
    print(chain)


Starting sampling


## Risk prediction

In [None]:
samples_HMM = acor_list

theta = np.mean(samples_HMM, axis=0)

extended_systm.spread_probs = theta[:-1]

diagnose = {"PET": np.array([0, 1, 0, 0])}
thin = 50
max_t=10
t = np.arange(max_t + 1)
early_p=0.3
late_p = theta[-1]


time_dists={
        "early": lymph.utils.fast_binomial_pmf(t, max_t, early_p),
        "late" : lymph.utils.fast_binomial_pmf(t, max_t, late_p)
    }


#Why is (was) this divided into the different T-stages
risks = []
np.random.seed(SEED)
for sample in np.random.permutation(samples_HMM)[::thin]:
    extended_systm.spread_probs = sample
    risks.append(
        extended_systm.risk(
            diagnoses=diagnose, 
            time_dist=time_dists["early", "late"], 
            mode="HMM"
        )
    )