# Semantic Segmentation (regimes) with arc curves # 
“arc curve” annotates the raw time series with information about the likelihood of a regime change.

https://stumpy.readthedocs.io/en/latest/Tutorial_Semantic_Segmentation.html

https://sites.google.com/site/onlinesemanticsegmentation/

In [2]:
# SETUP
from tssb.utils import load_time_series_segmentation_datasets
from tssb.evaluation import covering
import pandas as pd
import stumpy

import core.utils as utils
import core.calculate as calculate

import matplotlib.pyplot as plt
from tssb.utils import visualize_time_series

## Time Series Segmentation Benchmark (TSSB) ##

https://github.com/ermshaua/time-series-segmentation-benchmark

In [3]:
# Datasets
tssb = load_time_series_segmentation_datasets()

### Benchmark FLUSS Ensemble - known number of change points ###


In [4]:
dilation_sizes = [1,2,3,4,5]

col_names = ["flussEnsembleMinALL"]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    score = calculate.segmentation_fluss_known_cps_ensemble_min_BOTHWINDOWSETTINGS(ts, ts_name, cps, dilation_sizes, L, n_regimes, target_w=window_size, m=window_size)
    df_my_results.loc[len(df_my_results)] = score

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known_stumpy_target_w_fluss.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy_target_w_and_m_fluss_min.csv", index=False)


Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [553, 998, 1188], CAC values: [0.7171529920060912, 0.7622397661135115, 0.7961758171047336], Score: 0.9003097290781114
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [742], CAC values: [0.9545456071394108], Score: 0.985496930254549
Time Series: Beef: True Change Points: [705], Found Change Points: [474], CAC values: [0.6446692799813584], Score: 0.7127727768685216
Time Series: BeetleFly: True Change Points: [1280], Found Change Points: [1240], CAC values: [0.9944125554420816], Score: 0.9692234848484848
Time Series: BirdChicken: True Change Points: [1280], Found Change Points: [1255], CAC values: [0.7055296512300501], Score: 0.9806558309386972
Time Series: Car: True Change Points: [ 577 1154 1550], Found Change Points: [1133, 1237, 541], CAC values: [0.7781675932592419, 0.7909544013745363, 0.8555433657283508], Score: 0.7364715422298941
Time Series: CBF: True Change Points: [384 704], Fo

### Benchmark FLUSS - known number of change points - fixed target range w ###
Hyperparameter:
- excl_factor (default: 5)
- L
- n_regimes

In [None]:
dilation_sizes = [1,2,3,4,5]

col_names = ["flussEnsembleMin"]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    score = calculate.segmentation_fluss_known_cps_ensemble_min(ts, ts_name, cps, dilation_sizes, L, n_regimes, target_w=window_size, m=None)
    df_my_results.loc[len(df_my_results)] = score

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known_stumpy_target_w_fluss.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy_target_w_fluss_min.csv", index=False)


Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 1006, 615], CAC values: [0.6927344655357065, 0.7528625638330528, 0.7291390125553036], Score: 0.8192135911352328
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [734], CAC values: [0.9418086590967288], Score: 0.9750780986589048
Time Series: Beef: True Change Points: [705], Found Change Points: [661], CAC values: [0.3109873008526919], Score: 0.9394218295789184
Time Series: BeetleFly: True Change Points: [1280], Found Change Points: [1812], CAC values: [1.0], Score: 0.6453883830022076
Time Series: BirdChicken: True Change Points: [1280], Found Change Points: [2303], CAC values: [0.615227878161689], Score: 0.47770308089991315
Time Series: Car: True Change Points: [ 577 1154 1550], Found Change Points: [1135, 556, 1235], CAC values: [0.775569445209137, 0.861784286936893, 0.7923699997165544], Score: 0.7503261800597397
Time Series: CBF: True Change Points: [384 704], Found Change Point

### Benchmark FLUSS - known number of change points - fixed window size m (number of values) ###
Hyperparameter:
- excl_factor (default: 5)
- L
- n_regimes

In [15]:
dilation_sizes = [1,2,3,4,5]

col_names = ["flussEnsembleMin"]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    score = calculate.segmentation_fluss_known_cps_ensemble_min(ts, ts_name, cps, dilation_sizes, L, n_regimes, target_w=None, m=window_size)
    df_my_results.loc[len(df_my_results)] = score

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known_stumpy_m_fluss.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy_m_fluss_min.csv", index=False)


Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [563, 1008, 1224], Score: 0.9707547520353909 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [558, 998, 1185], Score: 0.9037065613418561 for d=2, m=10, w=19
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [553, 989, 604], Score: 0.8007397211976567 for d=3, m=10, w=28
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [545, 984, 1188], Score: 0.8736568918880732 for d=4, m=10, w=37
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [541, 1187, 591], Score: 0.7403611864035667 for d=5, m=10, w=46
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [749], Score: 0.9947019495772963 for d=1, m=10, w=10
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [738], Score: 0.9802742156374502 for d=2, m=10, w=19
Time Series: ArrowHead: True Change

### Benchmark FLUSS - unknown number of change points - fixed target range w ###
Hyperparameter:
- excl_factor (default: 5)
- L
- threshold

In [4]:
dilation_sizes = [1,2,3,4,5]
threshold = 0.45

col_names = ["flussEnsembleMin"]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    score = calculate.segmentation_fluss_unknown_cps_ensemble_min(ts, ts_name, cps, dilation_sizes, L, threshold, target_w=window_size, m=None)
    df_my_results.loc[len(df_my_results)] = score

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_unknown_stumpy_target_w_fluss.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_unknown_stumpy_target_w_fluss_min.csv", index=False)


### Benchmark FLUSS - unknown number of change points - fixed window size m (number of values) ###
Hyperparameter:
- excl_factor (default: 5)
- L
- threshold

In [4]:
dilation_sizes = [1,2,3,4,5]
threshold = 0.45

col_names = ["flussEnsembleMin"]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    score = calculate.segmentation_fluss_unknown_cps_ensemble_min(ts, ts_name, cps, dilation_sizes, L, threshold, target_w=window_size, m=None)
    df_my_results.loc[len(df_my_results)] = score

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_unknown_stumpy_m_fluss.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_unknown_stumpy_m_fluss_min.csv", index=False)

Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [349, 439, 512, 563, 613, 663, 713, 780, 833, 889, 957, 1008, 1058, 1122, 1173, 1224, 1283], Score: 0.4303977272727273 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [328, 378, 432, 507, 558, 608, 658, 708, 788, 884, 947, 998, 1048, 1126, 1185, 1238], Score: 0.4772727272727273 for d=2, m=10, w=19
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [340, 425, 502, 553, 604, 654, 704, 792, 887, 938, 989, 1039, 1112, 1164, 1215, 1266], Score: 0.4616477272727273 for d=3, m=10, w=28
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [295, 346, 427, 494, 545, 595, 645, 695, 787, 887, 984, 1035, 1085, 1137, 1188, 1239], Score: 0.4375 for d=4, m=10, w=37
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [288, 339, 438, 490, 541, 591, 641, 691, 783, 858, 930, 981, 1031, 1084, 1136, 1