In [1]:
import numpy as np

import plotly.graph_objs as go

In [2]:
nr_points = 50
interval_start = 0
interval_end = 20

t = np.linspace(interval_start, interval_end, nr_points)

# Generate two similar shaped time series
series1 = 0.8 + 0.9 * np.sin(0.9 + 0.9 * t) + 0.03 * np.random.randn(nr_points) - 0.2 * (t / (interval_end - interval_start))
series2 = np.sin(1.05 * t) + 0.15 * np.random.randn(nr_points)

In [3]:
def plot_time_series_plotly(t, series1, series2, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t, y=series1, mode='lines+markers', name='Series 1'))
    fig.add_trace(go.Scatter(x=t, y=series2, mode='lines+markers', name='Series 2'))
    fig.update_layout(
        title=title,
        xaxis_title='t',
        yaxis_title='Value',
        legend_title='Series'
    )
    return fig

In [4]:
plot_time_series_plotly(t, series1, series2, 'Two Similar Time Series')

### Euclidean Distance
The Euclidean distance is the most straightforward similarity measure. It calculates the straight-line distance between two points.
For time series data, this means comparing the values at each time point directly.

Having two time series $X = (x_1, x_2, \ldots, x_n)$ and $Y = (y_1, y_2, \ldots, y_n)$ of equal length $n$, the Euclidean distance $d$ between them is calculated as:
$$d(X, Y) = \sqrt{\sum_{i=1}^{n} (x_i - y_i)^2}$$

In [5]:
#| code-fold: true

def plot_euclidean_distance(series1, series2, t):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t, y=series1, mode='lines+markers', name='Series 1'))
    fig.add_trace(go.Scatter(x=t, y=series2, mode='lines+markers', name='Series 2'))

    # Draw lines showing the Euclidean distance at each point (every 5th for clarity)
    for i, v in enumerate(t):
        fig.add_trace(go.Scatter(
            x=[v, v],
            y=[series1[i], series2[i]],
            mode='lines',
            line=dict(color='gray', dash='dash', width=1),
            showlegend=False
        ))

    fig.update_layout(
        title='Euclidean Distance Visualization Between Series 1 and Series 2',
        xaxis_title='t',
        yaxis_title='Value'
    )
    
    return fig

In [6]:
plot_euclidean_distance(series1, series2, t)

In [7]:
def euclidean_distance(series1, series2):
    return np.sqrt(np.sum((series1 - series2) ** 2))

d_euclidean = euclidean_distance(series1, series2)
print(f'Euclidean Distance: {d_euclidean:.2f}')

Euclidean Distance: 6.43


### Dynamic Time Warping
**Dynamic Time Warping (DTW)** is a more advanced similarity measure that accounts for shifts and distortions in the time axis.
It finds the optimal alignment between two time series by warping the time dimension, allowing for comparisons even when the series are out of phase or have different lengths.

In [8]:
from scipy.spatial.distance import cdist

def dtw_distance(s1, s2):
    n, m = len(s1), len(s2)

    # initializing cost matrix
    cost = np.full((n + 1, m + 1), np.inf)
    cost[0, 0] = 0

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            dist = abs(s1[i - 1] - s2[j - 1])
            cost[i, j] = dist + min(cost[i - 1, j],     # insertion
                                    cost[i, j - 1],     # deletion
                                    cost[i - 1, j - 1]) # match
            
    # backtracking path (just for visualization)
    path = []

    i, j = n, m
    
    while i > 0 and j > 0:
        path.append((i - 1, j - 1))
        steps = [(i - 1, j), (i, j - 1), (i - 1, j - 1)]
        costs = [cost[s] if s[0] >= 0 and s[1] >= 0 else np.inf for s in steps]
        min_step = steps[np.argmin(costs)]
        i, j = min_step

    path = path[::-1]

    return cost[n, m], path

d_dtw, path = dtw_distance(series1, series2)

print(f'Dynamic Time Warping Distance: {d_dtw:.2f}')

Dynamic Time Warping Distance: 21.82


Note that this DTW implementation is using a very naive approach computing the full cost matrix.
Many different variants exist that are more efficient and/or add constraints to the warping path.
@lahreche2021 provides a good overview.

In [9]:
def plot_dtw_alignment(s1, s2, t, path):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t, y=s1, mode='lines+markers', name='Series 1'))
    fig.add_trace(go.Scatter(x=t, y=s2, mode='lines+markers', name='Series 2'))

    # Draw alignment lines
    for (i, j) in path:
        fig.add_trace(go.Scatter(
            x=[t[i], t[j]],
            y=[s1[i], s2[j]],
            mode='lines',
            line=dict(color='gray', width=1, dash='dot'),
            showlegend=False
        ))

    fig.update_layout(
        title='Dynamic Time Warping Alignment Between Series 1 and Series 2',
        xaxis_title='t',
        yaxis_title='Value'
    )
    return fig


plot_dtw_alignment(series1, series2, t, path)

### Preprocessing
In the previous plots we observe that noise, outliers and positioning of the time series can have a significant impact on the similarity measures.
To mitigate these effects, we can apply various preprocessing techniques such as:
- **Smoothing**: Applying filters (e.g., moving average, Gaussian) to reducce noise.
- **Normalization**: Scaling the time series to a common range to ensure that differences in amplitude do not dominate the similarity measure.
- **Detrending**: Removing trends to focus on the fluctuations around a mean level.

Note that the applied preprocessing techniques should be chosen based on the specific characteristics of the data and the analysis goals.

Here, we first smooth the time series using a simple moving average filter.
This can easily be realized by convolution with a specific kernel (a vector consisting of equal weights summing to 1, where the length of the vector is equal to the desired window size).

In [10]:
def moving_average(series, window_size=3):
    kernel = np.ones(window_size) / window_size
    return np.convolve(series, kernel, mode='same')

series1_smooth = moving_average(series1)
series2_smooth = moving_average(series2)

In [11]:
plot_time_series_plotly(t, series1_smooth, series2_smooth, 'Two Similar Time Series Smoothed')

In [12]:
def normalize(series):
    return (series - np.min(series)) / (np.max(series) - np.min(series))

series1_smooth_norm = normalize(series1_smooth)
series2_smooth_norm = normalize(series2_smooth)

In [13]:
plot_time_series_plotly(t, series1_smooth_norm, series2_smooth_norm, 'Two Similar Time Series Smoothed and Normalized')

In [14]:
d_euclidean_sn = euclidean_distance(series1_smooth_norm, series2_smooth_norm)
print(f'Euclidean Distance: {d_euclidean_sn:.2f}')

plot_euclidean_distance(series1_smooth_norm, series2_smooth_norm, t)

Euclidean Distance: 2.29


In [15]:
d_dtw_sn, path_sn = dtw_distance(series1_smooth_norm, series2_smooth_norm)

print(f'Dynamic Time Warping Distance: {d_dtw_sn:.2f}')

plot_dtw_alignment(series1_smooth_norm, series2_smooth_norm, t, path_sn)

Dynamic Time Warping Distance: 5.07
