In [17]:
import abc
import inspect
import gen_mod
import test_utils
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture

In [18]:
models = ['Win', 'KL-CPD', 'TIRE', 'RuLSIF',
          'QDA Exam', 'QDA GMM', 'QDA VAE', 'QDA NF',
          'DT Exam', 'DT GMM',
          'RBF Exam', 'GMM+RBF']
res = dict.fromkeys(models)

In [24]:
def full_test(model, clf='QDA', score_type='KL', cond_dgm=False, save_scores=False, model_name=None):
    
    datasets = {
#         'Mean':  {'window_size': 50, 'margin': 20, 'random_seeds': [2, 3, 5, 7, 9, 11, 13, 17, 19, 73], 'dim':1},
#         'Var':   {'window_size': 50, 'margin': 20, 'random_seeds': [2, 3, 5, 7, 9, 11, 13, 17, 19, 73], 'dim':1},
#         'Cov':   {'window_size': 50, 'margin': 20, 'random_seeds': [2, 3, 5, 7, 9, 11, 13, 17, 19, 73], 'dim':2},
        'bee':   {'window_size': 5,  'margin': 10, 'random_seeds': None, 'dim':3},
#         'hasc':  {'window_size': 40, 'margin': 50, 'random_seeds': None, 'dim':1},
#         'usc':   {'window_size': 70, 'margin': 20, 'random_seeds': None, 'dim':6},
#         'wisdm': {'window_size': 60, 'margin': 20, 'random_seeds': None, 'dim':3}
    }
    
    res = dict.fromkeys(datasets.keys())
    
    for ds in datasets.keys():
        
        tester = test_utils.Tester(clf=clf, score_type=score_type, cond_dgm=cond_dgm, save_scores=save_scores, model_name=model_name)
        
        print(ds)
        
        params = datasets[ds]
        
        if type(model) != str:
            
            model_args = inspect.getfullargspec(model).args
            
            if 'n_components' in model_args:
                tester.model = model(n_components=min(6, params['window_size']), covariance_type='spherical')
            elif 'var_size' in model_args:
                tester.model = deepcopy(model(var_size=params['dim']))
            elif 'in_dim' in model_args:
                tester.model = deepcopy(model(in_dim=params['dim'], out_dim=params['dim']))
            else:
                tester.model = deepcopy(model)
        else:
            tester.model = model
            
        tester.window_size = params['window_size']
        tester.margin = params['margin']
        tester.dataset = ds
        tester.random_seeds=params['random_seeds']
        
        res[ds] = tester.cpd_test()
    
    if model_name == None:
        if model == 'Exam': model_name = model + clf + score_type
        else: model_name = model
    
    return pd.DataFrame(res).T

In [20]:
# 'Win'     : full_test('Win')
# 'KL-CPD'  : full_test('KL-CPD')
# 'TIRE'    : full_test('TIRE')
# 'RuLSIF'  : full_test('RuLSIF')

# 'QDA Exam': full_test('Exam')
# 'GMM+QDA' : full_test(GaussianMixture, model_name='GMM+QDA')
# vae full_test(gen_mod.CPDVAE, model_name='DoubleVAE')

# 'DT Exam' : full_test('Exam', clf='DT')
# 'GMM+DT'  : full_test(GaussianMixture, clf='DT', model_name='GMM+DT')

# 'RBF Exam': full_test('Exam', score_type='RBF')
# 'GMM+RBF' : full_test(GaussianMixture, score_type='RBF', model_name='GMM+RBF')

# Baselines

In [21]:
for baseline in ['Win', 'KL-CPD', 'TIRE', 'RuLSIF']:
    res[baseline] = full_test(baseline, model_name=baseline)

bee
Fitting the model...
Dataset #1
Dataset #2
Dataset #3
Dataset #4
Dataset #5
Dataset #6
Computing metrics...
PR AUC: 0.336 ± 0.045
bee
Fitting the model...
Dataset #1


100%|██████████| 1/1 [00:09<00:00,  9.05s/it]


Dataset #2


100%|██████████| 1/1 [00:15<00:00, 15.00s/it]


Dataset #3


100%|██████████| 1/1 [00:07<00:00,  7.71s/it]


Dataset #4


100%|██████████| 1/1 [00:12<00:00, 12.04s/it]


Dataset #5


100%|██████████| 1/1 [00:07<00:00,  7.80s/it]


Dataset #6


100%|██████████| 1/1 [00:08<00:00,  8.64s/it]


Computing metrics...
PR AUC: 0.370 ± 0.095
bee
Fitting the model...
Dataset #1
Training autoencoder for original timeseries


Loss: 0.40: 100%|██████████| 200/200 [01:05<00:00,  3.06it/s]


Training autoencoder for FFT timeseries


Loss: 0.03: 100%|██████████| 200/200 [01:19<00:00,  2.52it/s]


Dataset #2
Training autoencoder for original timeseries


Loss: 0.64: 100%|██████████| 200/200 [01:01<00:00,  3.27it/s]


Training autoencoder for FFT timeseries


Loss: 0.03: 100%|██████████| 200/200 [00:50<00:00,  3.99it/s]


Dataset #3
Training autoencoder for original timeseries


Loss: 0.67: 100%|██████████| 200/200 [00:33<00:00,  6.04it/s]


Training autoencoder for FFT timeseries


Loss: 0.05: 100%|██████████| 200/200 [00:46<00:00,  4.27it/s]


Dataset #4
Training autoencoder for original timeseries


Loss: 0.60: 100%|██████████| 200/200 [00:58<00:00,  3.41it/s]


Training autoencoder for FFT timeseries


Loss: 0.03: 100%|██████████| 200/200 [00:46<00:00,  4.29it/s]


Dataset #5
Training autoencoder for original timeseries


Loss: 0.53: 100%|██████████| 200/200 [00:40<00:00,  4.93it/s]


Training autoencoder for FFT timeseries


Loss: 0.05: 100%|██████████| 200/200 [00:58<00:00,  3.43it/s]


Dataset #6
Training autoencoder for original timeseries


Loss: 0.65: 100%|██████████| 200/200 [00:27<00:00,  7.31it/s]


Training autoencoder for FFT timeseries


Loss: 0.03: 100%|██████████| 200/200 [00:35<00:00,  5.70it/s]


Computing metrics...
PR AUC: 0.486 ± 0.082
bee
Fitting the model...
Dataset #1
Dataset #2
Dataset #3
Dataset #4
Dataset #5
Dataset #6
Computing metrics...
PR AUC: 0.503 ± 0.092


# RBF Cost methods

In [25]:
for model in ['Exam', GaussianMixture]:
    
    if model == 'Exam':
        model_name = 'RBF Exam'
    else:
        model_name = 'RBF GMM'
    
    res[model_name] = full_test(model, score_type='RBF', model_name=model_name)

bee
Fitting the model...
Dataset #1


100%|██████████| 1049/1049 [00:00<00:00, 3566.41it/s]


Dataset #2


100%|██████████| 1116/1116 [00:00<00:00, 3497.16it/s]


Dataset #3


100%|██████████| 594/594 [00:00<00:00, 2770.45it/s]


Dataset #4


100%|██████████| 748/748 [00:00<00:00, 3452.22it/s]


Dataset #5


100%|██████████| 805/805 [00:00<00:00, 1271.61it/s]


Dataset #6


100%|██████████| 600/600 [00:00<00:00, 1351.02it/s]


Computing metrics...
PR AUC: 0.336 ± 0.043
bee
Fitting the model...
Dataset #1


100%|██████████| 1049/1049 [03:34<00:00,  4.89it/s]


Dataset #2


100%|██████████| 1116/1116 [04:02<00:00,  4.59it/s]


Dataset #3


100%|██████████| 594/594 [02:49<00:00,  3.50it/s]


Dataset #4


100%|██████████| 748/748 [03:10<00:00,  3.93it/s]


Dataset #5


100%|██████████| 805/805 [04:28<00:00,  3.00it/s]


Dataset #6


100%|██████████| 600/600 [03:09<00:00,  3.16it/s]


Computing metrics...
PR AUC: 0.326 ± 0.050


# QDA and DT methods

In [23]:
model_name_flag = True

for clf in ['QDA', 'DT']:
    for model in ['Exam', GaussianMixture, gen_mod.CPDVAE, gen_mod.CPDNF]:
        
        if clf == 'DT' and type(model) == type:
            continue
        
        if model == 'Exam': model_name = clf + ' Exam'
        elif type(model) == abc.ABCMeta: model_name = clf + ' GMM'
        elif model_name_flag:
            model_name = 'QDA VAE'
            model_name_flag = False
        else: model_name = 'QDA NF'
        
        res[model_name] = full_test(model, clf=clf, model_name=model_name)

bee
Fitting the model...
Dataset #1


100%|██████████| 1049/1049 [00:07<00:00, 143.91it/s]


Dataset #2


100%|██████████| 1116/1116 [00:07<00:00, 144.13it/s]


Dataset #3


100%|██████████| 594/594 [00:04<00:00, 145.94it/s]


Dataset #4


100%|██████████| 748/748 [00:05<00:00, 146.39it/s]


Dataset #5


100%|██████████| 805/805 [00:05<00:00, 146.35it/s]


Dataset #6


100%|██████████| 600/600 [00:04<00:00, 144.50it/s]


Computing metrics...
PR AUC: 0.378 ± 0.088
bee
Fitting the model...
Dataset #1


100%|██████████| 1049/1049 [03:18<00:00,  5.29it/s]


Dataset #2


100%|██████████| 1116/1116 [03:26<00:00,  5.40it/s]


Dataset #3


100%|██████████| 594/594 [01:59<00:00,  4.97it/s]


Dataset #4


100%|██████████| 748/748 [02:40<00:00,  4.66it/s]


Dataset #5


100%|██████████| 805/805 [03:04<00:00,  4.35it/s]


Dataset #6


100%|██████████| 600/600 [02:04<00:00,  4.80it/s]


Computing metrics...
PR AUC: 0.657 ± 0.037
bee
Fitting the model...
Dataset #1


100%|██████████| 1049/1049 [04:34<00:00,  3.82it/s]


Dataset #2


100%|██████████| 1116/1116 [05:56<00:00,  3.13it/s]


Dataset #3


100%|██████████| 594/594 [03:02<00:00,  3.26it/s]


Dataset #4


100%|██████████| 748/748 [03:10<00:00,  3.93it/s]


Dataset #5


100%|██████████| 805/805 [04:49<00:00,  2.78it/s]


Dataset #6


100%|██████████| 600/600 [02:28<00:00,  4.04it/s]


Computing metrics...
PR AUC: 0.630 ± 0.029
bee
Fitting the model...
Dataset #1


100%|██████████| 1049/1049 [04:21<00:00,  4.01it/s]


Dataset #2


100%|██████████| 1116/1116 [04:28<00:00,  4.16it/s]


Dataset #3


100%|██████████| 594/594 [02:22<00:00,  4.16it/s]


Dataset #4


100%|██████████| 748/748 [03:57<00:00,  3.15it/s]


Dataset #5


 44%|████▍     | 358/805 [02:10<02:43,  2.74it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-61ab3e836ee0>", line 16, in <module>
    res[model_name] = full_test(model, clf=clf, model_name=model_name)
  File "<ipython-input-19-4ecc64886d4e>", line 43, in full_test
    res[ds] = tester.cpd_test()
  File "/Users/armenramazan/Desktop/CPD CMAME/code/test_utils.py", line 176, in cpd_test
    score = detector.predict(X)
  File "/Users/armenramazan/Desktop/CPD CMAME/code/cpd.py", line 132, in predict
    ascore = self.one_step_predict(ref[i], test[i])
  File "/Users/armenramazan/Desktop/CPD CMAME/code/cpd.py", line 243, in one_step_predict
    self.GenMod_test.fit(X_test)
  File "/Users/armenramazan/Desktop/CPD CMAME/code/gen_mod.py", line 568, in fit
    loss = -self.nf.log_prob(X_batch)
  File "/Users/armenramazan/Desktop/CPD C

TypeError: object of type 'NoneType' has no len()

# Show results

In [None]:
res