# Change Point Detection

- Binary segmentation
- Bottom up segmentation
- Pruned Exact Linear Time (PELT)

The data used for these models is the closeness centrality (CC) of 2003 (June - August).

In [1]:
# Load libraries
import netCDF4 as nc
import xarray as xr
import glob
import pandas as pd
import os
# from itertools import zip_longest
import numpy as np
# import seaborn as sns
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import cartopy.feature as cf
import ruptures as rpt
# from datetime import datetime
import pymannkendall as mk

ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed


## Data preparation

In [5]:
# CC data 2003
file_pattern = '../../private/complex_network_coefficients/2000-2009_run_20240105_1808/rasterfiles/Europe/2003/CN_Europe_0.25x0.25deg_CC_2003-*.nc'
file_paths = glob.glob(file_pattern)

# Open and concatenate files
cc_2003 = xr.open_mfdataset(file_paths)

# Rename coefficient
cc_2003 = cc_2003.rename({'coefficient': 'CC'})

# Save the merged data to a new NetCDF file
if os.path.exists("../data/cc_2003.nc"):
    os.remove("../data/cc_2003.nc")
    cc_2003.to_netcdf("../data/cc_2003.nc")
else:
    cc_2003.to_netcdf("../data/cc_2003.nc")

In [2]:
# Flatten 3D array of Closeness Centrality into 1D array
def flatten(data):
    flat_data = []
    for i in range(data.shape[2]):
        for j in range(data.shape[1]):
            node = []
            for k in range(data.shape[0]):
                node.append(data[k][j][i])
            flat_data.append(node)
    return flat_data

In [3]:
# Load your NetCDF file and variable
data = nc.Dataset('../data/cc_2003.nc')
variable = data['CC']
ds = xr.open_dataset('../data/cc_2003.nc')
input = flatten(np.array(variable))

In [8]:
# input_list = input.tolist()
df_input = pd.DataFrame(input)
df_input.to_csv("../data/df_input.csv", index=False)

## Coordinate extraction

In [4]:
# Get the 1D index of the change points
def get_1d_index(input, change_points):
    flat_index_guide = np.array_split(np.array(range(len(input[0])*len(input))), len(input))
    flat_index = []
    for i in range(len(change_points)):
        for j in range(len(change_points[i])-1):
            index = flat_index_guide[i][change_points[i][j]]
            flat_index.append(index)
    return flat_index

# Get the 3D index of the change points
def get_3d_index(arr, var_shape):
    index = []
    for i in range(len(arr)):
        ind = np.unravel_index(arr[i], var_shape)
        index.append(ind)
    return index

# Return the time, lat, and lon values related to the change points
def get_coords(index, ds):
    time = []
    lat = []
    lon = []
    for i in range(len(index)):
        t = ds['time'].values[index[i][0]]
        t = t.astype('datetime64[D]').astype(str)
        la = ds['lat'].values[index[i][1]]
        lo = ds['lon'].values[index[i][2]]
        time.append(t)
        lat.append(la)
        lon.append(lo)
    return time, lat, lon

## Source/Sink extraction

In [5]:
def sourcesink(input, change_points, jp):
    source_sink = []
    for i in range(len(change_points)):
        for j in range(len(change_points[i])-1):
            if (input[i][change_points[i][j] - jp] == 0.0) & (input[i][change_points[i][j] + jp] != 0.0):
                source_sink.append("increase")
            elif (input[i][change_points[i][j] - jp] != 0.0) & (input[i][change_points[i][j] + jp] == 0.0):
                source_sink.append("decrease")
            else: source_sink.append("none")  
    return source_sink

## Variable value

In [6]:
def variable_value(input, change_points):
    value = []
    for i in range(len(change_points)):
        for j in range(len(change_points[i])-1):
            value.append(input[i][change_points[i][j]])
    return value

## Models

In [20]:
def binseg(input, mod, jp):
    my_bkps = rpt.Binseg(model=mod, jump=jp).fit_predict(input, pen=1)
    return my_bkps

def bottomup(input, mod, jp):
    my_bkps = rpt.BottomUp(model=mod, jump=jp).fit_predict(input, pen=1)
    return my_bkps

def pelt(input, mod, jp):
    my_bkps = rpt.Pelt(model=mod, jump=jp).fit_predict(input, pen=1)
    return my_bkps

In [21]:
def changepoints(type, input, mod, jp):
    bkps = []
    if type == "binseg":
        for i in range(len(input)):
            bkps.append(binseg(np.array(input[i]), mod, jp))
    elif type == "bottomup":
        for i in range(len(input)):
            bkps.append(bottomup(np.array(input[i]), mod, jp))
    elif type == "pelt":
        for i in range(len(input)):
            bkps.append(pelt(np.array(input[i]), mod, jp))
    else: print("Mehtod does not exist")
    return bkps

### Binary segmentation

In [22]:
bkps_l1 = changepoints("binseg", input, "l1", 2)

flat_index = get_1d_index(input, bkps_l1)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_l1, 2)
value = variable_value(input, bkps_l1)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bs_l1 = pd.DataFrame(change_point_data)
print(df_bs_l1)

             Time   Latitude  Longitude SourceSink        CC
0      2003-06-04  40.577145  -3.164794   increase  1.000000
1      2003-06-04  40.577145  -1.157303   decrease  0.000000
2      2003-06-04  53.148571  19.419476   increase  1.000000
3      2003-06-04  53.148571  21.426966   decrease  0.000000
4      2003-06-04  65.720001  -5.172285   increase  0.877397
...           ...        ...        ...        ...       ...
11168  2003-09-01  46.360001  -0.655431   decrease  0.000000
11169  2003-09-01  49.377144 -16.464420   increase  0.974926
11170  2003-09-01  49.377144   7.374532   increase  0.974926
11171  2003-09-01  61.948570   5.617978   increase  1.000000
11172  2003-09-01  61.948570  29.456928   increase  1.000000

[11173 rows x 5 columns]


In [23]:
bkps_l2 = changepoints("binseg", input, "l2", 2)

flat_index = get_1d_index(input, bkps_l2)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_l2, 2)
value = variable_value(input, bkps_l2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bs_l2 = pd.DataFrame(change_point_data)
print(df_bs_l2)

             Time   Latitude  Longitude SourceSink   CC
0      2003-06-04  40.577145 -18.722847   increase  1.0
1      2003-06-04  40.577145 -16.213484   decrease  0.0
2      2003-06-04  40.577145  -6.176030   increase  0.0
3      2003-06-04  40.577145  -4.670412   decrease  0.0
4      2003-06-04  40.577145  -3.164794   increase  1.0
...           ...        ...        ...        ...  ...
41347  2003-09-01  69.742859  29.456928   decrease  0.0
41348  2003-09-01  69.742859  33.471909   increase  1.0
41349  2003-09-01  69.742859  36.985020   decrease  0.0
41350  2003-09-01  69.994286  -8.936330   increase  0.0
41351  2003-09-01  69.994286  -6.426966   decrease  0.0

[41352 rows x 5 columns]


In [24]:
bkps_rbf = changepoints("binseg", input, "rbf", 2)

flat_index = get_1d_index(input, bkps_rbf)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_rbf, 2)
value = variable_value(input, bkps_rbf)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bs_rbf = pd.DataFrame(change_point_data)
print(df_bs_rbf)

             Time   Latitude  Longitude SourceSink   CC
0      2003-06-04  40.577145 -18.722847   increase  1.0
1      2003-06-04  40.577145 -16.213484   decrease  0.0
2      2003-06-04  40.577145  -6.176030   increase  0.0
3      2003-06-04  40.577145  -4.670412   decrease  0.0
4      2003-06-04  40.577145  -3.164794   increase  1.0
...           ...        ...        ...        ...  ...
56696  2003-09-01  69.742859  36.985020   decrease  0.0
56697  2003-09-01  69.994286  -8.936330   increase  0.0
56698  2003-09-01  69.994286  -6.426966   decrease  0.0
56699  2003-09-01  69.994286  -2.913858   increase  1.0
56700  2003-09-01  69.994286  -1.408240   decrease  0.0

[56701 rows x 5 columns]


In [25]:
bkps_lin = changepoints("binseg", input, "linear", 2)

flat_index = get_1d_index(input, bkps_lin)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_lin, 2)
value = variable_value(input, bkps_lin)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bs_lin = pd.DataFrame(change_point_data)
print(df_bs_lin)

AssertionError: Not enough dimensions

In [26]:
bkps_nor = changepoints("binseg", input, "normal", 2)

flat_index = get_1d_index(input, bkps_nor)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_nor, 2)
value = variable_value(input, bkps_nor)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bs_nor = pd.DataFrame(change_point_data)
print(df_bs_nor)



              Time   Latitude  Longitude SourceSink        CC
0       2003-06-04  40.577145 -18.722847   increase  1.000000
1       2003-06-04  40.577145 -17.719101       none  1.000000
2       2003-06-04  40.577145 -17.217228       none  0.993076
3       2003-06-04  40.577145 -16.715355   decrease  0.974960
4       2003-06-04  40.577145 -16.213484   decrease  0.000000
...            ...        ...        ...        ...       ...
186469  2003-09-01  69.994286  -6.426966   decrease  0.000000
186470  2003-09-01  69.994286  -2.913858   increase  1.000000
186471  2003-09-01  69.994286  -2.411985       none  0.877397
186472  2003-09-01  69.994286  -1.910112   decrease  0.993286
186473  2003-09-01  69.994286  -1.408240   decrease  0.000000

[186474 rows x 5 columns]


In [27]:
bkps_ar = changepoints("binseg", input, "ar", 2)

flat_index = get_1d_index(input, bkps_ar)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_ar, 2)
value = variable_value(input, bkps_ar)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bs_ar = pd.DataFrame(change_point_data)
print(df_bs_ar)

             Time   Latitude  Longitude SourceSink        CC
0      2003-06-04  53.148571 -17.970037   decrease  0.974960
1      2003-06-04  65.720001 -19.224720   decrease  0.974960
2      2003-06-04  65.720001  -3.666667   decrease  0.000000
3      2003-06-04  65.971428 -23.239700   decrease  0.000000
4      2003-06-05  42.588570 -16.213484   increase  1.000000
...           ...        ...        ...        ...       ...
21070  2003-09-01  69.742859 -18.722847   decrease  0.998870
21071  2003-09-01  69.742859   5.116105   decrease  0.998870
21072  2003-09-01  69.742859  28.955055   decrease  0.998870
21073  2003-09-01  69.994286  -7.932584       none  0.983945
21074  2003-09-01  69.994286  -6.426966   decrease  0.000000

[21075 rows x 5 columns]


### Bottom up segmentation

In [9]:
bkps_l1 = changepoints("bottomup", input, "l1", 2)

flat_index = get_1d_index(input, bkps_l1)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_l1, 2)
value = variable_value(input, bkps_l1)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_bu_l1 = pd.DataFrame(change_point_data)
print(df_bu_l1)

             Time   Latitude  Longitude SourceSink        CC
0      2003-06-04  40.577145 -18.722847   increase  1.000000
1      2003-06-04  40.577145 -16.715355   decrease  0.974960
2      2003-06-04  40.577145  -5.674157   increase  0.998813
3      2003-06-04  40.577145  -4.670412   decrease  0.000000
4      2003-06-04  40.577145  -3.164794   increase  1.000000
...           ...        ...        ...        ...       ...
72517  2003-09-01  69.742859  36.985020   decrease  0.000000
72518  2003-09-01  69.994286  -8.434457   increase  0.986869
72519  2003-09-01  69.994286  -6.426966   decrease  0.000000
72520  2003-09-01  69.994286  -2.913858   increase  1.000000
72521  2003-09-01  69.994286  -1.408240   decrease  0.000000

[72522 rows x 5 columns]


In [32]:
# dataset = df_bu_l1.set_index(['Time', 'Latitude', 'Longitude'], inplace = True, append = True, drop = False).to_xarray()
# test = dataset.to_netcdf("test.nc")

In [None]:
# projection = ccrs.Mercator()
# crs = ccrs.PlateCarree()

# # Now we will create axes object having specific projection 
# plt.figure(dpi=150)
# ax = plt.axes(projection=projection, frameon=True)

# # Draw gridlines in degrees over Mercator map
# gl = ax.gridlines(crs=crs, draw_labels=True,
#                   linewidth=.6, color='gray', alpha=0.5, linestyle='-.')
# gl.xlabel_style = {"size" : 7}
# gl.ylabel_style = {"size" : 7}

# # To plot borders and coastlines, we can use cartopy feature
# ax.add_feature(cf.COASTLINE.with_scale("50m"), lw=0.5)
# ax.add_feature(cf.BORDERS.with_scale("50m"), lw=0.3)

# dataset = nc.Dataset("test.nc")

# sst = dataset.variables['CC'][1,:] #52
# lats = dataset.variables['Latitude'][:]
# lons = dataset.variables['Longitude'][:]

# plt.contourf(lons, lats, sst, 1,
#              transform=ccrs.PlateCarree())

# plt.show()

In [249]:
bkps_l2 = changepoints("bottomup", input, "l2", 2)

flat_index = get_1d_index(input, bkps_l2)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_l2, 2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'Source/Sink': source_sink}
df_bu_l2 = pd.DataFrame(change_point_data)
print(df_bu_l2)

             Time   Latitude  Longitude Source/Sink
0      2003-06-04  40.577145 -18.722847    increase
1      2003-06-04  40.577145 -16.213484    decrease
2      2003-06-04  40.577145  -6.176030    increase
3      2003-06-04  40.577145  -4.670412    decrease
4      2003-06-04  40.577145  -3.164794    increase
...           ...        ...        ...         ...
67865  2003-09-01  69.742859  36.985020    decrease
67866  2003-09-01  69.994286  -8.936330    increase
67867  2003-09-01  69.994286  -6.426966    decrease
67868  2003-09-01  69.994286  -2.913858    increase
67869  2003-09-01  69.994286  -1.408240    decrease

[67870 rows x 4 columns]


In [250]:
bkps_rbf = changepoints("bottomup", input, "rbf", 2)

flat_index = get_1d_index(input, bkps_rbf)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_rbf, 2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'Source/Sink': source_sink}
df_bu_rbf = pd.DataFrame(change_point_data)
print(df_bu_rbf)

             Time   Latitude  Longitude Source/Sink
0      2003-06-04  40.577145 -18.722847    increase
1      2003-06-04  40.577145 -16.213484    decrease
2      2003-06-04  40.577145  -6.176030    increase
3      2003-06-04  40.577145  -4.670412    decrease
4      2003-06-04  40.577145  -3.164794    increase
...           ...        ...        ...         ...
78280  2003-09-01  69.742859  36.985020    decrease
78281  2003-09-01  69.994286  -8.936330    increase
78282  2003-09-01  69.994286  -6.426966    decrease
78283  2003-09-01  69.994286  -2.913858    increase
78284  2003-09-01  69.994286  -1.408240    decrease

[78285 rows x 4 columns]


In [251]:
bkps_lin = changepoints("bottomup", input, "linear", 2)

flat_index = get_1d_index(input, bkps_lin)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_lin, 2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'Source/Sink': source_sink}
df_bu_lin = pd.DataFrame(change_point_data)
print(df_bu_lin)

AssertionError: Not enough dimensions

In [252]:
bkps_nor = changepoints("bottomup", input, "normal", 2)

flat_index = get_1d_index(input, bkps_nor)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_nor, 2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'Source/Sink': source_sink}
df_bu_nor = pd.DataFrame(change_point_data)
print(df_bu_nor)



              Time   Latitude  Longitude Source/Sink
0       2003-06-04  40.577145 -18.722847    increase
1       2003-06-04  40.577145 -17.719101        none
2       2003-06-04  40.577145 -17.217228        none
3       2003-06-04  40.577145 -16.715355    decrease
4       2003-06-04  40.577145 -16.213484    decrease
...            ...        ...        ...         ...
201827  2003-09-01  69.994286  -6.426966    decrease
201828  2003-09-01  69.994286  -2.913858    increase
201829  2003-09-01  69.994286  -2.411985        none
201830  2003-09-01  69.994286  -1.910112    decrease
201831  2003-09-01  69.994286  -1.408240    decrease

[201832 rows x 4 columns]


In [253]:
bkps_ar = changepoints("bottomup", input, "ar", 2)

flat_index = get_1d_index(input, bkps_ar)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_ar, 2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'Source/Sink': source_sink}
df_bu_ar = pd.DataFrame(change_point_data)
print(df_bu_ar)

             Time   Latitude  Longitude Source/Sink
0      2003-06-04  40.577145 -17.719101        none
1      2003-06-04  40.577145 -16.213484    decrease
2      2003-06-04  40.577145  -5.674157    increase
3      2003-06-04  53.148571 -17.468164    decrease
4      2003-06-04  53.148571  -6.928839    increase
...           ...        ...        ...         ...
31228  2003-09-01  69.742859  33.471909    increase
31229  2003-09-01  69.742859  34.977528    increase
31230  2003-09-01  69.742859  36.483147    decrease
31231  2003-09-01  69.994286  -8.434457    increase
31232  2003-09-01  69.994286  -6.928839    decrease

[31233 rows x 4 columns]


### PELT

In [28]:
bkps_l1 = changepoints("pelt", input, "l1", 2)

flat_index = get_1d_index(input, bkps_l1)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_l1, 2)
value = variable_value(input, bkps_l1)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_p_l1 = pd.DataFrame(change_point_data)
print(df_p_l1)

             Time   Latitude  Longitude SourceSink        CC
0      2003-06-04  40.577145 -18.722847   increase  1.000000
1      2003-06-04  40.577145 -16.715355   decrease  0.974960
2      2003-06-04  40.577145  -6.176030   increase  0.000000
3      2003-06-04  40.577145  -4.670412   decrease  0.000000
4      2003-06-04  40.577145  -3.164794   increase  1.000000
...           ...        ...        ...        ...       ...
59427  2003-09-01  69.742859  36.483147   decrease  0.992906
59428  2003-09-01  69.994286  -8.434457   increase  0.986869
59429  2003-09-01  69.994286  -6.928839   decrease  0.992906
59430  2003-09-01  69.994286  -2.913858   increase  1.000000
59431  2003-09-01  69.994286  -1.910112   decrease  0.993286

[59432 rows x 5 columns]


In [29]:
bkps_l2 = changepoints("binseg", input, "l2", 2)

flat_index = get_1d_index(input, bkps_l2)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_l2, 2)
value = variable_value(input, bkps_l2)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_p_l2 = pd.DataFrame(change_point_data)
print(df_p_l2)

             Time   Latitude  Longitude SourceSink   CC
0      2003-06-04  40.577145 -18.722847   increase  1.0
1      2003-06-04  40.577145 -16.213484   decrease  0.0
2      2003-06-04  40.577145  -6.176030   increase  0.0
3      2003-06-04  40.577145  -4.670412   decrease  0.0
4      2003-06-04  40.577145  -3.164794   increase  1.0
...           ...        ...        ...        ...  ...
41347  2003-09-01  69.742859  29.456928   decrease  0.0
41348  2003-09-01  69.742859  33.471909   increase  1.0
41349  2003-09-01  69.742859  36.985020   decrease  0.0
41350  2003-09-01  69.994286  -8.936330   increase  0.0
41351  2003-09-01  69.994286  -6.426966   decrease  0.0

[41352 rows x 5 columns]


In [30]:
bkps_rbf = changepoints("binseg", input, "rbf", 2)

flat_index = get_1d_index(input, bkps_rbf)
index = get_3d_index(flat_index, variable.shape)
time, lat, lon = get_coords(index, ds)
source_sink = sourcesink(input, bkps_rbf, 2)
value = variable_value(input, bkps_rbf)
change_point_data = {'Time': time, 'Latitude': lat, 'Longitude': lon, 'SourceSink': source_sink, 'CC': value}
df_p_rbf = pd.DataFrame(change_point_data)
print(df_p_rbf)

             Time   Latitude  Longitude SourceSink   CC
0      2003-06-04  40.577145 -18.722847   increase  1.0
1      2003-06-04  40.577145 -16.213484   decrease  0.0
2      2003-06-04  40.577145  -6.176030   increase  0.0
3      2003-06-04  40.577145  -4.670412   decrease  0.0
4      2003-06-04  40.577145  -3.164794   increase  1.0
...           ...        ...        ...        ...  ...
56696  2003-09-01  69.742859  36.985020   decrease  0.0
56697  2003-09-01  69.994286  -8.936330   increase  0.0
56698  2003-09-01  69.994286  -6.426966   decrease  0.0
56699  2003-09-01  69.994286  -2.913858   increase  1.0
56700  2003-09-01  69.994286  -1.408240   decrease  0.0

[56701 rows x 5 columns]


## Reduced dataset

Reduce the dataset to the daily CC average over the entire grid. Next, perform change point detection on the reduced dataset to identify change days. Next the complete CC dataset for 2003 can be reduced to the days found in the previous step. On this data change point detection can again be performed to identify change points (locations).

In [31]:
daily_average = []

for i in range(ds.sizes['time']):
    # Select data for the chosen day
    daily_data = ds.isel(time=i)

    # Calculate the average across all latitudes and longitudes
    average_value = daily_data.mean(dim=("lat", "lon"))

    value = average_value.CC.values
    
    daily_average.append(float(value))

### Binary segmentation

In [None]:
bkps_l1 = rpt.Binseg('l1').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_l1)

bkps_l2 = rpt.Binseg('l2').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_l2)

bkps_rbf = rpt.Binseg('rbf').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_rbf)

bkps_nor = rpt.Binseg('normal').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_nor)

bkps_ar = rpt.Binseg('ar').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_ar)

### Bottom up segmentation

In [2]:
bkps_l1 = rpt.BottomUp('l1').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_l1)

bkps_l2 = rpt.BottomUp('l2').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_l2)

bkps_rbf = rpt.BottomUp('rbf').fit_predict(np.array(daily_average), n_bkps=4)
print(bkps_rbf)

### PELT

In [None]:
bkps_l1 = rpt.Pelt('l1').fit_predict(np.array(daily_average), pen=4)
print(bkps_l1)

bkps_l2 = rpt.Pelt('l2').fit_predict(np.array(daily_average), pen=4)
print(bkps_l2)

bkps_rbf = rpt.Pelt('rbf').fit_predict(np.array(daily_average), pen=4)
print(bkps_rbf)

### Visualize results

In [None]:
plt.plot(daily_average)
plt.axvline(5, color='red')
plt.axvline(10, color='red')
plt.axvline(15, color='red')
plt.axvline(30, color='red')
plt.axvline(35, color='red')
plt.axvline(45, color='red')
plt.axvline(60, color='red')
plt.axvline(65, color='red')
plt.axvline(75, color='red')
plt.axvline(80, color='red')
plt.axvline(85, color='red')