In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from mcdrift import *

In this experiment, we would like to compare the performance of P-CDM and NP-CDM, in terms of false positive rates. We set the following parameters:

1. $N = 5$ (number of states)

2. $T = 10,000$ (sequence length)

3. $L = 1,000$ (number of observations guaranteed to come from $P_0$)

4. $W \in \{2, 5, 10, 50, 100\}$ (subsequence length)

5. $K \in \{1, 2, 5, 10, 20, 50\}$ (detection threshold)

We randomly generate $P_0$ from the standard uniform distribution and normalize such that each row sums to 1. We set $\pi = (0.2, 0.2, 0.2, 0.2, 0.2)$.

Also, we estimate the time of abrupt change using the center of the predicted interval. For example, when $W = 10$ and the change is detected in $s_k$ with $91 \leq k \leq 100$, the estimated time is simply $(100+91)/2 = 95.5$.

We repeat the experiments $500$ times and calculate the false positive rates.

In [7]:
N = 5
T = 10000
tstar = 2000
L = 1000
Ws = [2, 5, 10, 50, 100]
Ks = [1, 2, 5, 10, 20, 50]
pi = np.array([1/5, 1/5, 1/5, 1/5, 1/5])

result_dict_p = {}
result_dict_np = {}


for W in Ws:
    for K in Ks:
        result_dict_p[(W,K)] = 0
        result_dict_np[(W,K)] = 0

seed = 2023
n_rep = 500

In [8]:
np.random.seed(seed)

for _ in tqdm(range(n_rep)):
    p0 = np.random.rand(N,N)
    p0 = p0/p0.sum(axis=1,keepdims=1)
    p1 = np.random.rand(N,N)
    p1 = p1/p1.sum(axis=1,keepdims=1)

    seq_comb = simulate_mc(pi, p0, T)

    for W in Ws:
        for K in Ks:
            pcdm_res = pcdm(seq_comb, W, p0, p1, K)
            npcdm_res = npcdm(seq_comb, W, N, L, K)
            if type(pcdm_res) != str:
                result_dict_p[(W,K)] += 1/n_rep
            if type(npcdm_res) != str:
                result_dict_np[(W,K)] += 1/n_rep

  l = ll1 - ll0
100%|██████████| 500/500 [19:57<00:00,  2.39s/it]


In [9]:
{k:100*v for k,v in result_dict_p.items()}

{(2, 1): 100.00000000000007,
 (2, 2): 100.00000000000007,
 (2, 5): 97.00000000000007,
 (2, 10): 49.40000000000004,
 (2, 20): 7.800000000000005,
 (2, 50): 1.4000000000000001,
 (5, 1): 100.00000000000007,
 (5, 2): 100.00000000000007,
 (5, 5): 46.20000000000004,
 (5, 10): 0.8,
 (5, 20): 0,
 (5, 50): 0,
 (10, 1): 100.00000000000007,
 (10, 2): 95.40000000000008,
 (10, 5): 3.600000000000002,
 (10, 10): 0,
 (10, 20): 0,
 (10, 50): 0,
 (50, 1): 26.200000000000017,
 (50, 2): 0.6,
 (50, 5): 0,
 (50, 10): 0,
 (50, 20): 0,
 (50, 50): 0,
 (100, 1): 1.2,
 (100, 2): 0,
 (100, 5): 0,
 (100, 10): 0,
 (100, 20): 0,
 (100, 50): 0}

In [10]:
{k:100*v for k,v in result_dict_np.items()}

{(2, 1): 99.80000000000008,
 (2, 2): 99.60000000000008,
 (2, 5): 99.20000000000007,
 (2, 10): 97.20000000000007,
 (2, 20): 94.20000000000007,
 (2, 50): 84.80000000000007,
 (5, 1): 99.40000000000008,
 (5, 2): 99.00000000000007,
 (5, 5): 96.80000000000007,
 (5, 10): 91.60000000000007,
 (5, 20): 84.80000000000007,
 (5, 50): 51.000000000000036,
 (10, 1): 99.00000000000007,
 (10, 2): 97.80000000000008,
 (10, 5): 93.00000000000007,
 (10, 10): 87.40000000000006,
 (10, 20): 71.40000000000005,
 (10, 50): 24.60000000000002,
 (50, 1): 96.00000000000007,
 (50, 2): 92.00000000000007,
 (50, 5): 84.60000000000007,
 (50, 10): 60.600000000000044,
 (50, 20): 22.200000000000017,
 (50, 50): 0.8,
 (100, 1): 92.00000000000007,
 (100, 2): 88.60000000000007,
 (100, 5): 74.80000000000005,
 (100, 10): 39.00000000000003,
 (100, 20): 8.200000000000006,
 (100, 50): 0}