## **Part 1: A Discrete-Time Model**

In [17]:
# Plotting
import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots as sp
import plotly.io as pio
#pio.renderers.default = "notebook_connected"
#pio.templates.default = "plotly_dark"

# Utilities
import numpy as np
from scipy.stats import chi2
from scipy.special import kolmogorov
from tqdm import tqdm

### **Task 1**

In [18]:
P = np.array([
    [0.9915, 0.005, 0.0025, 0, 0.001],
    [0, 0.986, 0.005, 0.004, 0.005],
    [0, 0, 0.992, 0.003, 0.005],
    [0, 0, 0, 0.991, 0.009],
    [0, 0, 0, 0, 1]
])

In [19]:
def discrete_time_system(P, n, cond=lambda x: True, verbose=True):

    samples = []
    if verbose:
        pbar = tqdm(total=n, desc="Accepted samples")  # For tracking progress
    while len(samples) < n:

        # Simulate a sample
        x = 0
        states = []
        while x != 4:
            x = np.random.choice(range(P.shape[0]), p=P[x])
            states.append(x)
        
        # Check if the sample satisfies the condition (Rejection sampling)
        if cond(states):
            samples.append(states)
            if verbose:
                pbar.update(len(samples) - pbar.n)
    if verbose:
        pbar.close()

    # Compute survival times (time to death)
    survival_times = np.array([len(s) for s in samples])

    # Make all samples have the same length
    max_len = max(survival_times)
    samples = np.array([s + [P.shape[0]-1]*(max_len - len(s)) for s in samples])

    return samples, survival_times

In [20]:
num_samples = 1000
samples, survival_times = discrete_time_system(P, num_samples)

Accepted samples:   0%|          | 0/1000 [00:00<?, ?it/s]

Accepted samples: 100%|██████████| 1000/1000 [00:13<00:00, 76.23it/s]


In [21]:
# Check what samples entered state 2
np.histogram(samples[:, 1], bins=range(6))[0] / num_samples

state2_prop = np.mean(np.sum(samples == 1, axis=1) > 0)
print("Proportion of samples that entered state 2:", state2_prop)

Proportion of samples that entered state 2: 0.594


### **Task 2**

In [22]:
def pt(P, t):
    p0 = np.zeros(P.shape[0])
    p0[0] = 1
    return p0@np.linalg.matrix_power(P, t)

In [23]:
# Theoretical probability
p0 = np.zeros(P.shape[0])
p0[0] = 1
pt = p0@np.linalg.matrix_power(P, 120)

fig = go.Figure(go.Histogram(x=samples[:, 120]+1, histnorm="probability density", marker=dict(color='Blue')))
for i in range(5):
    fig.add_trace(go.Scatter(x=[i+0.55, i+1.45], y=[pt[i], pt[i]], mode="lines", marker=dict(color='Lime')))
fig.update_layout(title="Distribution of states at time 120", xaxis_title="State",
    yaxis_title="Density", bargap=0.1, width=500, height=400, showlegend=False)
fig.show()
#save fig
pio.write_image(fig, "plots/task_2.png", scale=2)

### **Task 3**

In [24]:
pi = p0[:-1]
Ps = P[:-1, :-1]
ps = P[:-1, -1]
dpt_pmf = lambda t: pi@(np.linalg.matrix_power(Ps, t)@ps)
mu = np.sum(pi@np.linalg.inv(np.eye(Ps.shape[0]) - Ps))

# Plot pdf on the histogram
fig = go.Figure(go.Histogram(x=survival_times, histnorm="probability density", marker=dict(color='Blue')))
t = np.arange(0, samples.shape[1])
pdf = [dpt_pmf(i) for i in t]
fig.add_trace(go.Scatter(x=t, y=pdf, mode="lines", marker=dict(color='Lime')))
fig.update_layout(title="Discrete Time System Simulation", xaxis_title="Survival time",
    yaxis_title="Density", bargap=0.1, width=800, height=500, showlegend=False)
fig.show()
#save fig
pio.write_image(fig, "plots/task_3.png", scale=2)

In [25]:
observed, _ = np.histogram(survival_times, bins=range(1, max(survival_times)+1))
expected = np.array([dpt_pmf(i) for i in range(1, max(survival_times))])
expected = expected / expected.sum() * num_samples

# Kolmogorov-Smirnov test
cdf = np.cumsum(expected) / num_samples
sim_cdf = np.cumsum(observed) / num_samples
Dn = np.max(np.abs(cdf - sim_cdf))
p_value = kolmogorov(Dn)
print("Kolmogorov-Smirnov test p-value:", p_value)

Kolmogorov-Smirnov test p-value: 1.0


### **Task 4**

In [26]:
idx_12months = survival_times > 12
idx_reappeared = np.any(np.isin(samples[:, :13], [1, 2, 3]), axis=1)
idx = idx_12months & idx_reappeared

print("Expected survival time directly from samples:", np.mean(survival_times[idx]))

Expected survival time directly from samples: 181.57954545454547


In [27]:
cond = lambda x: np.sum(np.isin(x[:13], [1, 2, 3])) > 0 and len(x) > 12
_, survival_times = discrete_time_system(P, num_samples, cond)
print("Expected survival time using rejection sampling:", np.mean(survival_times))

Accepted samples:   5%|▍         | 49/1000 [00:08<03:00,  5.26it/s]

KeyboardInterrupt: 

### **Task 5**

In [None]:
survive350 = np.zeros(100)
survive_mean = np.zeros(100)
for i in tqdm(range(100)):
    _, survival_times = discrete_time_system(P, 200, verbose=False)
    survive_mean[i] = np.mean(survival_times)
    survive350[i] = np.mean(survival_times > 350)

100%|██████████| 100/100 [03:48<00:00,  2.28s/it]


In [None]:
c = - np.cov(survive350, survive_mean)[0, 1] / np.var(survive_mean)
Z = survive350 + c * (survive_mean - mu)

In [None]:
print(f"Mean Monte Carlo: {np.mean(survive350):.4f}")
print(f"Variance Monte Carlo: {np.var(survive350):.4f}")
print(f"Mean Control Variate: {np.mean(Z):.4f}")
print(f"Variance Control Variate: {np.var(Z):.4f}")

Mean Monte Carlo: 0.2659
Variance Monte Carlo: 0.0009
Mean Control Variate: 0.2614
Variance Control Variate: 0.0003


### **Task 6**