In [1]:
import sys
import os
file_path = 'codes/*.py'
directory = os.path.dirname(os.path.abspath(file_path))
sys.path.append(directory)

import numpy as np
import pandas as pd
import pickle
import utils
import shapelet_classifier
import time
from sklearn.metrics import classification_report


  'GPU support will not work. You must pip install mass-ts[gpu].')


## 1. Loading dataset

In [23]:
import sktime
from sktime.datasets import load_from_tsfile

DATA_PATH = os.path.join(os.path.dirname(sktime.__file__), "datasets/data")

X_train, y_train = load_from_tsfile(
    os.path.join(DATA_PATH, "ItalyPowerDemand/ItalyPowerDemand_TRAIN.ts"),return_data_type="numpy2d"
)
X_test, y_test = load_from_tsfile(
    os.path.join(DATA_PATH, "ItalyPowerDemand/ItalyPowerDemand_TEST.ts"),return_data_type="numpy2d"
)
y_train, y_test = y_train.astype('int'), y_test.astype('int')
classes = np.unique(y_train)

## 2. Shapelet discovery with DDPS

In [21]:
import DDPS
sorted_candidates, sorted_cand_idx = DDPS.DDP_candidates(X_train, y_train, overlap='loose')

yes
loose
loose


## 3. Shapelet selection using SScv and SSpi

In [29]:
# SScv and SSpi search best shapelets from top 30 of the sorted candidate list
min_n_candidates = min([len(sorted_candidates[ele]) for ele in sorted_candidates])
n_shapelets = min(min_n_candidates,30)
cand_keep, idx_keep = utils.keep_shapelets(classes, sorted_candidates, sorted_cand_idx, n_shapelets)

# transform training set for cross validation experiments
# slicing transformed data is faster than repeatedly transforming data with different set of shaspelets 
transformed_X_train = utils.transform(X_train, cand_keep)

# record cross validation results of each k=1,2,3,...30, store in shapelet_scores
shapelet_scores=[]
for k in range(n_shapelets):
    # slicing corresponding attributes of transformed data, equivalent to transforming data with k shapelets
    indices = np.array([range(i*n_shapelets,i*n_shapelets+k+1) for i in range(len(classes))]).reshape(-1)
    X_train_hat = transformed_X_train[indices].transpose()
    # get cross validation score
    score,clf = shapelet_classifier.classifier(X_train_hat,y_train)
    shapelet_scores.append(score)

# SScv:
best_k=np.argmax(shapelet_scores)
best_shapelets, best_s_idx = utils.keep_shapelets(classes, sorted_candidates, sorted_cand_idx, best_k)

SScv_X_train = utils.transform(X_train, best_shapelets).transpose() # transform X_train with selected shapelets 
SScv_X_test = utils.transform(X_test, best_shapelets).transpose() # transform X_test

_,clf = shapelet_classifier.classifier(SScv_X_train,y_train)
SScv_model = clf.fit(SScv_X_train,y_train) # train classifier

SScv_test_result = classification_report(y_test,SScv_model.predict(SScv_X_test), output_dict=False)

# SSpi:
rank = shapelet_classifier.get_srank(shapelet_scores)
indices = np.array([rank+i*n_shapelets for i in range(len(classes))]).reshape(-1)
best_shapelets = [cand_keep[i] for i in indices]

SSpi_X_train = utils.transform(X_train, best_shapelets).transpose() # transform X_train with selected shapelets 
SSpi_X_test = utils.transform(X_test, best_shapelets).transpose() # transform X_test

_,clf = shapelet_classifier.classifier(SSpi_X_train,y_train)
SSpi_model = clf.fit(SSpi_X_train,y_train) # train classifier

SSpi_test_result = classification_report(y_test,SSpi_model.predict(SSpi_X_test), output_dict=False)

print('SScv results:', SScv_test_result)
print('SSpi results:', SSpi_test_result)
