In [1]:
import numpy as np
import pandas as pd
rng = np.random.default_rng(12345)

from lymph.models import Unilateral
from lymixture import LymphMixture
from lymixture.utils import binom_pmf, late_binomial, normalize
from fixtures import (
    get_graph,
    get_patient_data,
    MODALITIES,
)
PARAMS_C1 = {
    "TtoII": 0.5,
    "TtoIII": 0.25,
    "TtoIV": 0.1,
    "IItoIII": 0.4,
    "IIItoIV": 0.3,
    "late_p": 0.5,
}
PARAMS_C2 = {
    "TtoII": 0.65,
    "TtoIII": 0.15,
    "TtoIV": 0.05,
    "IItoIII": 0.5,
    "IIItoIV": 0.4,
    "late_p": 0.5,
}

In [2]:
graph = {
    ("tumor", "T"): ["II", "III"],
    ("lnl", "II"): ["III"],
    ("lnl", "III"): [],
}
patient_data = get_patient_data()
num_components = 2

mixture = LymphMixture(
    model_cls=Unilateral,
    model_kwargs={"graph_dict": graph},
    num_components=num_components,
)
data = pd.read_csv("data/mixture.csv", header=[0,1,2])
mixture.load_patient_data(data, split_by=("tumor", "1", "subsite"), mapping=lambda x: x)

mixture.set_modality("path", 1., 1.)

In [3]:
mixture.set_distribution("early", binom_pmf(np.arange(11), 10, 0.3))
mixture.set_distribution("late", late_binomial)
mixture.get_all_distributions()

{'early': Distribution([0.0282475249, 0.121060821, 0.23347444050000002, 0.266827932, 0.20012094900000002, 0.1029193452, 0.03675690900000001, 0.009001692, 0.0014467005000000002, 0.00013778100000000004, 5.904900000000001e-06]),
 'late': Distribution([0.0009765625, 0.009765625, 0.0439453125, 0.1171875, 0.205078125, 0.24609375, 0.205078125, 0.1171875, 0.0439453125, 0.009765625, 0.0009765625])}

In [4]:
from lymixture.em import expectation, maximization

params = {k: rng.uniform() for k in mixture.get_params()}
mixture.set_params(**params)
mixture.normalize_mixture_coefs()

latent = normalize(np.ones_like(mixture.get_resps()).T, axis=0).T

In [5]:
mixture.get_mixture_coefs()

Unnamed: 0,c1,c2,c3
0,0.417942,0.611725,0.259667
1,0.582058,0.388275,0.740333


In [6]:
params = maximization(mixture, latent)

  return np.log(res) if log else res


In [7]:
latent = expectation(mixture, params)

In [8]:
def to_numpy(params: dict[str, float]) -> np.ndarray:
    return np.array([p for p in params.values()])

In [9]:
converged = False
count = 0

while not converged:
    old_params = params
    params = maximization(mixture, latent)
    latent = expectation(mixture, params)
    converged = np.allclose(to_numpy(params), to_numpy(old_params))
    count += 1

mixture.get_params()

{'0_TtoII_spread': 0.0890498387369957,
 '0_TtoIII_spread': 0.3365327913205429,
 '0_IItoIII_spread': 0.999950245757657,
 '0_c1_coef': 0.8486886120326795,
 '0_c2_coef': 6.610696135189607e-05,
 '0_c3_coef': 0.5266330398607697,
 '1_TtoII_spread': 0.2812826613804874,
 '1_TtoIII_spread': 0.08792314890704245,
 '1_IItoIII_spread': 0.26241678192160744,
 '1_c1_coef': 0.15131138796732047,
 '1_c2_coef': 0.9999338930386481,
 '1_c3_coef': 0.4733669601392303,
 'late_p': 0.5143081802000884}

In [10]:
count

23