# Semantic Segmentation (regimes) with arc curves # 
“arc curve” annotates the raw time series with information about the likelihood of a regime change.

https://stumpy.readthedocs.io/en/latest/Tutorial_Semantic_Segmentation.html

https://sites.google.com/site/onlinesemanticsegmentation/

In [1]:
# SETUP
from tssb.utils import load_time_series_segmentation_datasets
from tssb.evaluation import covering
import pandas as pd
import stumpy

import core.utils as utils
import core.calculate as calculate

import matplotlib.pyplot as plt
from tssb.utils import visualize_time_series

## Time Series Segmentation Benchmark (TSSB) ##

https://github.com/ermshaua/time-series-segmentation-benchmark

In [2]:
tssb = load_time_series_segmentation_datasets()

In [3]:
def segmentation_fluss_known_cps(T, T_name, cps, ds, L, n_regimes, target_w, m):
    assert (target_w is None) != (m is None)
    if target_w:
        calculate_m = True
    else:
        calculate_m = False
    
    current_best_dilation = None
    sum_current_best_cac_score = 99
    for d in ds:
        if calculate_m:
            m = round((target_w-1)/d) + 1
        actual_w = (m-1)*d + 1

        if d == 1:
            mp = stumpy.stump(T, m=m)
        else:
            mp = stumpy.stump_dil(T, m=m, d=d)
        cac, found_cps = stumpy.fluss(mp[:, 1], L=L, n_regimes=n_regimes)
        sum_cac_score = sum([cac[cp] for cp in found_cps])
        if sum_cac_score < sum_current_best_cac_score:
            current_best_dilation = d
            sum_current_best_cac_score = sum_cac_score
            score = covering({0: cps}, found_cps, T.shape[0])
    print(f"Time Series: {T_name}, Learned Dilation size: {current_best_dilation}, Score: {score}")
    return score, current_best_dilation

In [4]:
dilation_sizes = [1,2,3,4,5]

col_names = ["flussEnsemble", "dilationSize"]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    score, current_best_dilation = segmentation_fluss_known_cps(ts, ts_name, cps, dilation_sizes, L, n_regimes, target_w=None, m=window_size)
    df_my_results.loc[len(df_my_results)] = [score, current_best_dilation]

Time Series: Adiac, Learned Dilation size: 4, Score: 0.8736568918880732
Time Series: ArrowHead, Learned Dilation size: 4, Score: 0.985496930254549
Time Series: Beef, Learned Dilation size: 4, Score: 0.7127727768685216
Time Series: BeetleFly, Learned Dilation size: 5, Score: 0.9692234848484848
Time Series: BirdChicken, Learned Dilation size: 3, Score: 0.9806558309386972
Time Series: Car, Learned Dilation size: 2, Score: 0.7364715422298941
Time Series: CBF, Learned Dilation size: 3, Score: 0.6867847481802085
Time Series: Chinatown, Learned Dilation size: 1, Score: 1.0
Time Series: ChlorineConcentration, Learned Dilation size: 1, Score: 0.9997436461895858
Time Series: CinCECGTorso, Learned Dilation size: 5, Score: 0.6076885270406197
Time Series: Coffee, Learned Dilation size: 2, Score: 0.9880711462450592
Time Series: Computers, Learned Dilation size: 1, Score: 0.4935159817351598
Time Series: CricketX, Learned Dilation size: 1, Score: 0.7874002518254477
Time Series: CricketY, Learned Dilat

In [5]:
# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known_stumpy_m_ensemble.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy_m_ensemble_learned.csv", index=False)