# Semantic Segmentation (regimes) with arc curves # 
“arc curve” annotates the raw time series with information about the likelihood of a regime change.

https://stumpy.readthedocs.io/en/latest/Tutorial_Semantic_Segmentation.html

https://sites.google.com/site/onlinesemanticsegmentation/

In [1]:
# SETUP
from tssb.utils import load_time_series_segmentation_datasets
from tssb.evaluation import covering
import pandas as pd
import stumpy

import helper.utils as utils
import helper.calculate as calculate

import matplotlib.pyplot as plt
from tssb.utils import visualize_time_series

## Time Series Segmentation Benchmark (TSSB) ##

https://github.com/ermshaua/time-series-segmentation-benchmark

In [2]:
# Datasets
tssb = load_time_series_segmentation_datasets()

### Benchmark FLUSS - known number of change points ###
Hyperparameter:
- excl_factor (default: 5)
- L
- n_regimes

In [4]:
dilation_sizes = [1,2,3,4,5] # TODO dilation sizes automatisch bestimmen?

col_names = ["fluss"+str(d) for d in dilation_sizes]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    target_w = window_size
    scores = calculate.segmentation_fluss_known_cps(ts, ts_name, cps, dilation_sizes, target_w, L, n_regimes)
    df_my_results.loc[len(df_my_results)] = scores

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_known.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_known_stumpy.csv", index=False)


Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [563, 1008, 1224], Score: 0.9707547520353909 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 1006, 615], Score: 0.8192135911352328 for d=2, m=5, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 615, 1009], Score: 0.8223986813624972 for d=3, m=4, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [565, 635, 343], Score: 0.45746213991037216 for d=4, m=3, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [563, 624, 1017], Score: 0.8134285513327476 for d=5, m=3, w=11
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [749], Score: 0.9947019495772963 for d=1, m=10, w=10
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [734], Score: 0.9750780986589048 for d=2, m=5, w=9
Time Series: ArrowHead: True Change Points

### Benchmark FLUSS - unknown number of change points ###
Hyperparameter:
- excl_factor (default: 5)
- L
- threshold

In [3]:
dilation_sizes = [1,2,3,4,5] # TODO dilation sizes automatisch bestimmen?
threshold = 0.45

col_names = ["fluss"+str(d) for d in dilation_sizes]
df_my_results = pd.DataFrame(columns=col_names)
for _, (ts_name, window_size, cps, ts) in tssb.iterrows():
    L = window_size
    n_regimes = len(cps)+1
    target_w = window_size
    scores = calculate.segmentation_fluss_unknown_cps(ts, ts_name, cps, dilation_sizes, target_w, L, threshold)
    df_my_results.loc[len(df_my_results)] = scores

# merge both dataframes
df_scores = pd.read_csv("../results/segmentation/segmentation_covering_unknown.csv")
result = pd.concat([df_scores, df_my_results], axis=1)
result.to_csv("../results/segmentation/segmentation_covering_unknown_stumpy.csv", index=False)

Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [349, 439, 512, 563, 613, 663, 713, 780, 833, 889, 957, 1008, 1058, 1122, 1173, 1224, 1283], Score: 0.4303977272727273 for d=1, m=10, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [349, 439, 514, 565, 615, 665, 715, 791, 841, 894, 955, 1006, 1056, 1106, 1189, 1241], Score: 0.4794034090909091 for d=2, m=5, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [344, 439, 514, 565, 615, 665, 715, 889, 958, 1009, 1097, 1190, 1243], Score: 0.5511363636363636 for d=3, m=4, w=10
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [], Score: 0.302734375 for d=4, m=3, w=9
Time Series: Adiac: True Change Points: [ 572 1012 1232], Found Change Points: [], Score: 0.302734375 for d=5, m=3, w=11
Time Series: ArrowHead: True Change Points: [753], Found Change Points: [749], Score: 0.9947019495772963 for d=1, m=10, w=10
Time Series

In [7]:
# # visualize:
# fig, ax = visualize_time_series(ts, ts_name, cps, found_cps)
# plt.show()