## Importing

In [None]:
import pandas as pd
import scipy as sp
import numpy as np

import emcee
from multiprocessing import Pool

import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
from matplotlib import font_manager
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import ListedColormap
from cycler import cycler
import corner
from typing import Any, Dict, List, Optional, Tuple, Union

import lymph

## Creating or Loading Model

In [3]:
filename = "../data/extended_system.hdf5"

graph = {
    ('tumor', 'primary') : ['II', 'III', 'IV','VII'],
    ('lnl', 'I')         : [], 
    ('lnl', 'II')        : ['I', 'III', 'V', 'VII'], 
    ('lnl', 'III')       : ['IV'], 
    ('lnl', 'IV')        : [],
    ('lnl', 'V')         : [],
    ('lnl', 'VII')       : [],
}
extended_systm = lymph.Unilateral(graph=graph)

print(extended_systm)

NameError: name 'lymph' is not defined

## Modalities

In [None]:
spsn = {"PET": [0.86, 0.79],
        "MRI": [0.63, 0.81],
        "diagnostic_consensus": [0.63, 0.81],
        "pathology": [1., 1.]}
#                         ^   ^
#                specificty   sensitivity
extended_systm.modalities = spsn

## Data

In [None]:

data = pd.read_csv("latest.csv", header=[0,1,2] )
t_stage = data.iloc[:,18]
data = data.iloc[:,21:168]
ipsi_data = data.xs("ipsi",level=1,axis=1)
t_stage.loc[t_stage <= 2, ] = "early"
t_stage.loc[t_stage!="early", ] = "late"


ipsi_data["info","t_stage"]= t_stage
ipsi_data = ipsi_data.drop(['Ia', 'Ib',"IIa","IIb"], level=1, axis=1)
ipsi_data = ipsi_data.drop(['CT', 'FNA',"pCT"], level=0, axis=1)

extended_systm.patient_data = ipsi_data


## Storage of model

In [None]:
extended_systm.to_hdf(
    filename=filename, 
    name="extended/model"
)

## Likelihood

In [None]:
#settings for the binom distributions
early_p=0.3
max_t=10
t = np.arange(max_t + 1)

def llh(theta):
    spread_probs, late_p = theta[:-1], theta[-1]
    
    print("run")
    if late_p > 1. or late_p < 0.:
        return -np.inf
    
    
    time_dists={
        "early": sp.stats.binom.pmf(t, max_t, early_p),
        "late" : sp.stats.binom.pmf(t, max_t, late_p),
    }
     
    return extended_systm.marginal_log_likelihood(spread_probs, t_stages=["early", "late"], time_dists=time_dists)

## Sampling

In [None]:
# Settings for the sampler
ndim = len(extended_systm.spread_probs) + 1
nwalkers = 2 * ndim
nstep = 10
burnin = 5
moves = [(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)]


#prepare the backend
backend = emcee.backends.HDFBackend(
    filename="extended_system.hdf5",
    name="extended/samples"
)
backend.reset(nwalkers, ndim)


# starting point
initial_spread_probs = np.random.uniform(low=0., high=1., size=(nwalkers,ndim))


if __name__ == "__main__":
    sampler = emcee.EnsembleSampler(
        nwalkers, ndim, 
        llh, 
        moves=moves,
        backend=backend
    )
    sampler.run_mcmc(initial_spread_probs, nstep, progress=True)
    samples_HMM = sampler.get_chain(flat=True, discard=burnin)
    print(samples_HMM)      


