# Signature Time Series Clustering

In [4]:
# Native libraries
import os
import math

# Essential Libraries
import matplotlib.pyplot as plt
import numpy as np

# Preprocessing
from tslearn.svm import TimeSeriesSVC
from tslearn.preprocessing import TimeSeriesScalerMinMax, TimeSeriesResampler
from sklearn.model_selection import train_test_split

# Algorithms


from sklearn.decomposition import PCA

In [5]:
from tqdm import tqdm

## Load time series data from CSV file

In [6]:
my_signatures = []
signature_2_series = {}

series_directory = "/time_series_data/"
sign_ids = list(range(1, 56))
count_per_sign = 24

pbar = tqdm(total=len(sign_ids) * count_per_sign)
for i in sign_ids:
    for j in range(1, count_per_sign + 1):
        with open(f"time_series_data/{i}/{j}.csv") as f:
            line = f.readline()
            if line is not None:
                series = [int(y) for y in line.strip().split(',')]
                label = i
                name = f"signature_{i}_{j}"
                my_signatures.append((name, label))
                signature_2_series[name] = np.array(series)
        pbar.update(1)
pbar.close()

  9%|▉         | 125/1320 [00:13<02:07,  9.39it/s]
100%|██████████| 1320/1320 [00:09<00:00, 139.91it/s]


In [7]:
from numpy.random import default_rng

## Preprocessing

In [8]:
# check if the series have same length
series_lengths = {len(series) for series  in signature_2_series.values()}
print(series_lengths)

{640}


In [9]:
train_name, test_name = train_test_split(my_signatures, test_size=0.2, random_state=1234)

In [10]:
X_train = []
y_train = []
X_test = []
y_test = []

for name, label in train_name:
    X_train.append(signature_2_series[name])
    y_train.append(int(label))

for name, label in test_name:
    X_test.append(signature_2_series[name])
    y_test.append(int(label))

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [11]:
X_train = TimeSeriesResampler(sz=320).fit_transform(X_train)
X_test = TimeSeriesResampler(sz=320).fit_transform(X_test)

print("new train shape: ", X_train.shape)
print("new testshape: ", X_test.shape)

new train shape:  (1056, 320, 1)
new testshape:  (264, 320, 1)


In [None]:
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
X_test = TimeSeriesScalerMinMax().fit_transform(X_test)

## Time series Classification

In [None]:
clf = TimeSeriesSVC(kernel="gak", gamma=.1)
clf.fit(X_train, y_train)

## Evaluation

In [None]:
print("Correct classification rate:", clf.score(X_test, y_test))

In [None]:
n_classes = len(set(y_train))

plt.figure()
support_vectors = clf.support_vectors_
for i, cl in enumerate(set(y_train)):
    plt.subplot(n_classes, 1, i + 1)
    plt.title("Support vectors for class %d" % cl)
    for ts in support_vectors[i]:
        plt.plot(ts.ravel())

plt.tight_layout()
plt.show()