In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tslearn.metrics import dtw
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.shapelets import LearningShapelets, \
    grabocka_params_to_shapelet_size_dict

In [2]:
train_data = np.genfromtxt('../data/OSULeaf_TRAIN.txt')
test_data = np.genfromtxt('../data/OSULeaf_TEST.txt')

In [3]:
X_train = train_data[:,1:]
y_train = train_data[:,0]

In [4]:
X_test = test_data[:,1:]
y_test = test_data[:,0]

In [28]:
test_seq = X_test[0]

In [34]:
dtw_dict = {}
for CLASS in range(1,7):
    train_class = X_train[np.where(y_train==CLASS)]
    dtw_score = []
    for train_seq in train_class:
        dtw_score.append(dtw(test_seq, train_seq))
    dtw_dict[f'Class_{CLASS}'] = dtw_score

In [36]:
dtw_mean = {}
for key, vals in dtw_dict.items():
    dtw_mean[key] = np.mean(vals)

In [37]:
dtw_mean

{'Class_1': np.float64(7.757273008634903),
 'Class_2': np.float64(9.985754209117044),
 'Class_3': np.float64(8.20279804586223),
 'Class_4': np.float64(9.955665610746955),
 'Class_5': np.float64(6.604638446592319),
 'Class_6': np.float64(8.86511791959894)}

In [8]:
y_test[0]

np.float64(5.0)

In [None]:
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)

# Get statistics of the dataset
n_ts, ts_sz = X_train.shape[:2]
n_classes = len(set(y_train))

# We will extract 1 shapelet and align it with a time series
shapelet_sizes = {20: 1}

# Define the model and fit it using the training data
shp_clf = LearningShapelets(n_shapelets_per_size=shapelet_sizes,
                            weight_regularizer=0.001,
                            optimizer=Adam(lr=0.01),
                            max_iter=250,
                            verbose=0,
                            scale=False,
                            random_state=42)
shp_clf.fit(X_train, y_train)

# Get the number of extracted shapelets, the (minimal) distances from
# each of the timeseries to each of the shapelets, and the corresponding
# locations (index) where the minimal distance was found
n_shapelets = sum(shapelet_sizes.values())
distances = shp_clf.transform(X_train)
predicted_locations = shp_clf.locate(X_train)

f, ax = plt.subplots(2, 1, sharex=True)

# Plot the shapelet and align it on the best matched time series. The optimizer
# will often enlarge the shapelet to create a larger gap between the distances
# of both classes. We therefore normalize the shapelet again before plotting.
test_ts_id = np.argmin(np.sum(distances, axis=1))
shap = shp_clf.shapelets_[0]
shap = TimeSeriesScalerMinMax().fit_transform(shap.reshape(1, -1, 1)).flatten()
pos = predicted_locations[test_ts_id, 0]
ax[0].plot(X_train[test_ts_id].ravel())
ax[0].plot(numpy.arange(pos, pos + len(shap)), shap, linewidth=2)
ax[0].axvline(pos, color='k', linestyle='--', alpha=0.25)
ax[0].set_title("The aligned extracted shapelet")

# We calculate the distances from the shapelet to the timeseries ourselves.
distances = []
time_series = X_train[test_ts_id].ravel()
for i in range(len(time_series) - len(shap)):
    distances.append(numpy.linalg.norm(time_series[i:i+len(shap)] - shap))
ax[1].plot(distances)
ax[1].axvline(numpy.argmin(distances), color='k', linestyle='--', alpha=0.25)
ax[1].set_title('The distances between the time series and the shapelet')

plt.tight_layout()
plt.show()