# Semantic Segmentation (regimes) with arc curves # 
“arc curve” annotates the raw time series with information about the likelihood of a regime change.

https://stumpy.readthedocs.io/en/latest/Tutorial_Semantic_Segmentation.html

https://sites.google.com/site/onlinesemanticsegmentation/

In [1]:
# SETUP
from tssb.utils import load_time_series_segmentation_datasets
from tssb.evaluation import covering
import pandas as pd
import stumpy

import core.utils as utils
import core.calculate as calculate

import matplotlib.pyplot as plt
from tssb.utils import visualize_time_series

## Time Series Segmentation Benchmark (TSSB) ##

https://github.com/ermshaua/time-series-segmentation-benchmark

In [14]:
# Datasets
tssb = load_time_series_segmentation_datasets()

### Benchmark FLUSS - known number of change points - fixed target range w ###
Hyperparameter:
- excl_factor (default: 5)
- L
- n_regimes

In [13]:
dilation_sizes = [1,2,3,4,5]

col_names = ["fluss"+str(d) for d in dilation_sizes]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    scores = calculate.segmentation_fluss_known_cps(ts, ts_name, cps, dilation_sizes, L, n_regimes, target_w=window_size, m=None)
    df_my_results.loc[len(df_my_results)] = scores

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy_target_w.csv", index=False)


Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [563, 1008, 1224], Score: 0.9707547520353909 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 1006, 615], Score: 0.8192135911352328 for d=2, m=5, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 615, 1009], Score: 0.8223986813624972 for d=3, m=4, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 635, 343], Score: 0.45746213991037216 for d=4, m=3, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [563, 624, 1017], Score: 0.8134285513327476 for d=5, m=3, w=11
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [749], Score: 0.9947019495772963 for d=1, m=10, w=10
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [734], Score: 0.9750780986589048 for d=2, m=5, w=9
Time Series: ArrowHead: True Change Points

### Benchmark FLUSS - known number of change points - fixed window size m (number of values) ###
Hyperparameter:
- excl_factor (default: 5)
- L
- n_regimes

In [15]:
dilation_sizes = [1,2,3,4,5]

col_names = ["fluss"+str(d) for d in dilation_sizes]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    scores = calculate.segmentation_fluss_known_cps(ts, ts_name, cps, dilation_sizes, L, n_regimes, target_w=None, m=window_size)
    df_my_results.loc[len(df_my_results)] = scores

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy_m.csv", index=False)


Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [563, 1008, 1224], Score: 0.9707547520353909 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [558, 998, 1185], Score: 0.9037065613418561 for d=2, m=10, w=19
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [553, 989, 604], Score: 0.8007397211976567 for d=3, m=10, w=28
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [545, 984, 1188], Score: 0.8736568918880732 for d=4, m=10, w=37
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [541, 1187, 591], Score: 0.7403611864035667 for d=5, m=10, w=46
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [749], Score: 0.9947019495772963 for d=1, m=10, w=10
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [738], Score: 0.9802742156374502 for d=2, m=10, w=19
Time Series: ArrowHead: True Change

### Benchmark FLUSS - unknown number of change points - fixed target range w ###
Hyperparameter:
- excl_factor (default: 5)
- L
- threshold

In [3]:
dilation_sizes = [1,2,3,4,5]
threshold = 0.45

col_names = ["fluss"+str(d) for d in dilation_sizes]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    scores = calculate.segmentation_fluss_unknown_cps(ts, ts_name, cps, dilation_sizes, L, threshold, target_w=window_size, m=None)
    df_my_results.loc[len(df_my_results)] = scores

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_unknown.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_unknown_stumpy_target_w.csv", index=False)

Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [349, 439, 512, 563, 613, 663, 713, 780, 833, 889, 957, 1008, 1058, 1122, 1173, 1224, 1283], Score: 0.4303977272727273 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [349, 439, 514, 565, 615, 665, 715, 791, 841, 894, 955, 1006, 1056, 1106, 1189, 1241], Score: 0.4794034090909091 for d=2, m=5, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [344, 439, 514, 565, 615, 665, 715, 889, 958, 1009, 1097, 1190, 1243], Score: 0.5511363636363636 for d=3, m=4, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [], Score: 0.302734375 for d=4, m=3, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [], Score: 0.302734375 for d=5, m=3, w=11
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [749], Score: 0.9947019495772963 for d=1, m=10, w=10
Time Series

### Benchmark FLUSS - unknown number of change points - fixed window size m (number of values) ###
Hyperparameter:
- excl_factor (default: 5)
- L
- threshold

In [4]:
dilation_sizes = [1,2,3,4,5]
threshold = 0.45

col_names = ["fluss"+str(d) for d in dilation_sizes]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    scores = calculate.segmentation_fluss_unknown_cps(ts, ts_name, cps, dilation_sizes, L, threshold, target_w=None, m=window_size)
    df_my_results.loc[len(df_my_results)] = scores

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_unknown.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_unknown_stumpy_m.csv", index=False)

Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [349, 439, 512, 563, 613, 663, 713, 780, 833, 889, 957, 1008, 1058, 1122, 1173, 1224, 1283], Score: 0.4303977272727273 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [328, 378, 432, 507, 558, 608, 658, 708, 788, 884, 947, 998, 1048, 1126, 1185, 1238], Score: 0.4772727272727273 for d=2, m=10, w=19
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [340, 425, 502, 553, 604, 654, 704, 792, 887, 938, 989, 1039, 1112, 1164, 1215, 1266], Score: 0.4616477272727273 for d=3, m=10, w=28
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [295, 346, 427, 494, 545, 595, 645, 695, 787, 887, 984, 1035, 1085, 1137, 1188, 1239], Score: 0.4375 for d=4, m=10, w=37
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [288, 339, 438, 490, 541, 591, 641, 691, 783, 858, 930, 981, 1031, 1084, 1136, 1