In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from mcdrift import *

In this experiment, we would like to compare the performance of P-CDM and NP-CDM, in terms of false negative rates. We set the following parameters:

1. $N = 5$ (number of states)

2. $T = 10,000$ (sequence length)

3. $t^* = 2,000$ (time abrupt change occurs)

4. $L = 1,000$ (number of observations guaranteed to come from $P_0$)

5. $W \in \{2, 5, 10, 50, 100\}$ (subsequence length)

6. $K \in \{1, 2, 5, 10, 20, 50\}$ (detection threshold)

We randomly generate $P_0$ and $P_1$ from the standard uniform distribution and normalize such that each row sums to 1. We set $\pi = (0.2, 0.2, 0.2, 0.2, 0.2)$.

Also, we estimate the time of abrupt change using the center of the predicted interval. For example, when $W = 10$ and the change is detected in $s_k$ with $91 \leq k \leq 100$, the estimated time is simply $(100+91)/2 = 95.5$.

We repeat the experiments $500$ times and calculate the false negative rates.

In [2]:
N = 5
T = 10000
tstar = 2000
L = 1000
Ws = [2, 5, 10, 50, 100]
Ks = [1, 2, 5, 10, 20, 50]
pi = np.array([1/5, 1/5, 1/5, 1/5, 1/5])

result_dict_p = {}
result_dict_np = {}


for W in Ws:
    for K in Ks:
        result_dict_p[(W,K)] = 0
        result_dict_np[(W,K)] = 0

seed = 2023
n_rep = 500

In [3]:
np.random.seed(seed)

for _ in tqdm(range(n_rep)):
    p0 = np.random.rand(N,N)
    p0 = p0/p0.sum(axis=1,keepdims=1)
    p1 = np.random.rand(N,N)
    p1 = p1/p1.sum(axis=1,keepdims=1)

    seq_sim = simulate_mc(pi, p0, tstar)
    init_vec = np.zeros(N)
    init_vec[seq_sim[-1]] = 1
    seq_sim_2 = simulate_mc(init_vec, p1, T - tstar + 1)
    seq_comb = seq_sim + seq_sim_2[1:]
    
    for W in Ws:
        for K in Ks:
            pcdm_res = pcdm(seq_comb, W, p0, p1, K)
            npcdm_res = npcdm(seq_comb, W, N, L, K)
            if type(pcdm_res) == str:
                result_dict_p[(W,K)] += 1/n_rep
            if type(npcdm_res) == str:
                result_dict_np[(W,K)] += 1/n_rep

  ll += np.log(tm[seq[i-1], seq[i]])
  l = ll1 - ll0
100%|██████████| 500/500 [12:00<00:00,  1.44s/it]


In [8]:
{k:100*v for k,v in result_dict_p.items()}

{(2, 1): 0,
 (2, 2): 0,
 (2, 5): 0,
 (2, 10): 0,
 (2, 20): 0,
 (2, 50): 0.8,
 (5, 1): 0,
 (5, 2): 0,
 (5, 5): 0,
 (5, 10): 0,
 (5, 20): 0,
 (5, 50): 0,
 (10, 1): 0,
 (10, 2): 0,
 (10, 5): 0,
 (10, 10): 0,
 (10, 20): 0,
 (10, 50): 0,
 (50, 1): 0,
 (50, 2): 0,
 (50, 5): 0,
 (50, 10): 0,
 (50, 20): 0,
 (50, 50): 0,
 (100, 1): 0,
 (100, 2): 0,
 (100, 5): 0,
 (100, 10): 0,
 (100, 20): 0,
 (100, 50): 0}

In [7]:
{k:100*v for k,v in result_dict_np.items()}

{(2, 1): 0,
 (2, 2): 0.2,
 (2, 5): 0.8,
 (2, 10): 3.2000000000000015,
 (2, 20): 7.400000000000005,
 (2, 50): 13.80000000000001,
 (5, 1): 0.4,
 (5, 2): 1.2,
 (5, 5): 4.200000000000002,
 (5, 10): 8.800000000000006,
 (5, 20): 18.000000000000014,
 (5, 50): 22.400000000000016,
 (10, 1): 0.8,
 (10, 2): 2.2000000000000006,
 (10, 5): 6.800000000000004,
 (10, 10): 15.20000000000001,
 (10, 20): 22.200000000000017,
 (10, 50): 25.000000000000018,
 (50, 1): 3.800000000000002,
 (50, 2): 7.800000000000005,
 (50, 5): 19.000000000000014,
 (50, 10): 26.000000000000018,
 (50, 20): 26.400000000000016,
 (50, 50): 26.60000000000002,
 (100, 1): 6.800000000000004,
 (100, 2): 14.20000000000001,
 (100, 5): 24.40000000000002,
 (100, 10): 26.400000000000016,
 (100, 20): 26.400000000000016,
 (100, 50): 26.60000000000002}