In [11]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, BoxAnnotation, Span
from bokeh.io import output_notebook, reset_output
from bokeh.layouts import grid
import numpy as np

# Import from stumpy
# Documentation: https://stumpy.readthedocs.io/en/latest/index.htm
import stumpy


In [12]:
# Initialize an empty list to store sample data
sample_data = []

# Open the sample file in read mode
with open("sample_2465_2524.txt", "r") as sample_file:
    # Loop through each line in the file
    for line in sample_file:
        # Convert the line to a float and append it to the sample_data list
        sample_data.append(float(line))


In [13]:
# Matrix profile
# "normalize" is for applying z-normalizes subsequences. Note that the original Matrix profile is set to "True"
SUB_SEQUENCE_LENGTH = 60

# Calculate the Matrix Profile using STUMP
mp = stumpy.stump(sample_data, m=SUB_SEQUENCE_LENGTH, normalize=True)

In [14]:
# Reset any previously set output settings
reset_output()

# Output the Bokeh plots directly in the notebook
output_notebook()

# Define tooltips for the hover tool
TOOLTIPS = [
    ("index", "$index"),
    ("(x,y)", "$x, $y"),
]

# Define the starting and ending points for the transient pattern
starting_point_tran = 2465
ending_point_tran = 2524

# Create a BoxAnnotation to highlight the transient pattern area
answer_box_annotation = BoxAnnotation(left=starting_point_tran, right=ending_point_tran, fill_alpha=0.2, fill_color="#FF5733")

# Create vertical spans to mark the starting and ending points of the transient pattern
starting_span = Span(location=starting_point_tran, dimension='height', line_color="#C70039", line_dash='4 4', line_width=2)
ending_span = Span(location=ending_point_tran, dimension='height', line_color="#C70039", line_dash='4 4', line_width=2)


In [15]:
# Plotting raw data
# Create a figure for raw data
raw_plot = figure(tooltips=TOOLTIPS,  # Add tooltips for hover tool
                  title="Raw data", x_axis_label="time", y_axis_label="Flux")  # Set title and axis labels

# Create x-axis data points
x_axis_data = [*range(len(sample_data))]

# Create a ColumnDataSource for raw data
raw_datasource = ColumnDataSource(data=dict(x=x_axis_data, y=sample_data))

# Plot a line for raw data
raw_plot.line('x', 'y', source=raw_datasource, line_alpha=0.5, color="black", line_width=2, legend_label="Raw")

# Plot circles for raw data points
raw_plot.circle('x', 'y', source=raw_datasource, color="black", fill_alpha=0.6, legend_label="Raw", size=2)

# Add the BoxAnnotation and vertical spans to the raw plot
raw_plot.add_layout(answer_box_annotation)  # Add the box annotation
raw_plot.add_layout(starting_span)  # Add the starting vertical span
raw_plot.add_layout(ending_span)  # Add the ending vertical span

# Customize the appearance of the legend
raw_plot.legend.border_line_width = 3
raw_plot.legend.click_policy = "hide"
raw_plot.legend.border_line_color = "navy"
raw_plot.legend.border_line_alpha = 0.0


In [16]:
# Matrix profile plot
matrix_plot = figure(tooltips=TOOLTIPS,  # Add tooltips for hover tool
                    title="Matrix Profile", x_axis_label="Time", y_axis_label="")  # Set title and axis labels

# Plot trend component
# Note : first column consists of the matrix profile
# REF : https://stumpy.readthedocs.io/en/latest/api.html#stumpy.stump
matrix_profile = mp[:, 0]
# Create x-axis data points
x_axis_mp = [*range(len(matrix_profile))]
matrix_plot.line(x_axis_mp, matrix_profile, line_alpha=0.5, color="black", line_width=2, legend_label="Matrix Profile")
matrix_plot.circle(x_axis_mp, matrix_profile, color="black", fill_alpha=0.6, legend_label="Matrix Profile", size=2)

# Add the BoxAnnotation and vertical spans to the trend plot
matrix_plot.add_layout(answer_box_annotation)  # Add the box annotation
matrix_plot.add_layout(starting_span)  # Add the starting vertical span
matrix_plot.add_layout(ending_span)  # Add the ending vertical span

# Sync x-axis range with the raw data plot
matrix_plot.x_range = raw_plot.x_range


In [17]:
# Find discord discovery
def pop_next_index(idx, exclusion_zone_list):
    set_ex = set(exclusion_zone_list)
    return [x for x in idx if x not in set_ex]

TOP_K = 1
EXCLUSION_ZONE = round(SUB_SEQUENCE_LENGTH / 2)  # Adjusted rounding for Python 3
discord_idx_list = list(np.argsort(mp[:, 0])[::-1].ravel())  # Sort the matrix profile indices in descending order
top_discord_idx_list = []

# Find the top-k discord indices
for i in range(TOP_K):
    top_discord_idx_list.append(range(int(discord_idx_list[0]), int(discord_idx_list[0]) + SUB_SEQUENCE_LENGTH))
    start_zone = discord_idx_list[0] - EXCLUSION_ZONE
    end_zone = discord_idx_list[0] + EXCLUSION_ZONE + 1
    exclusion_zone_list = range(int(start_zone), int(end_zone))
    discord_idx_list = pop_next_index(discord_idx_list, exclusion_zone_list)


In [18]:
# Add top discord indices to the matrix profile graph
for top_discord_idx_range in top_discord_idx_list:
    matrix_profile_idx = top_discord_idx_range[0]

    # Add a vertical span to highlight the top discord index in the matrix profile graph
    top_discord_idx_span = Span(location=matrix_profile_idx, dimension='height', line_color="blue", line_dash='4 4', line_width=2)
    matrix_plot.add_layout(top_discord_idx_span)

    # Highlight discord sub-sequence in the raw data plot
    discord_subsequence_idx = [*top_discord_idx_range]
    discord_subsequence_value = sample_data[matrix_profile_idx:matrix_profile_idx + SUB_SEQUENCE_LENGTH]

    # Create a ColumnDataSource for the discord sub-sequence
    discord_datasource = ColumnDataSource(data=dict(x=discord_subsequence_idx, y=discord_subsequence_value))

    # Plot the discord sub-sequence in the raw data plot
    raw_plot.line('x', 'y', source=discord_datasource, line_alpha=0.8, color="blue", line_width=2, legend_label="Discord")


In [19]:
# Create a grid layout for the plots
grid_layout = grid([raw_plot, matrix_plot])

# Display the combined grid plot
show(grid_layout)
