In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot

from pyts.datasets import load_gunpoint
from pyts.transformation import ShapeletTransform
from pyts.classification import LearningShapelets

import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import metrics

import time

In [2]:
def normalization_1(data):
    
    data_mean = np.mean(data,0)   #(72,)
    data_std = np.std(data,0,ddof=1) 
    data_ = (data - data_mean)/data_std
    
    return data_, data_mean , data_std

In [3]:
def transpose_for_shapelet(x):
    x = x.reshape([-1, 52])
    x, _, _ = normalization_1(x)

    train_feature_T = []
    train_lable_T = []

    for i in range(int(x.shape[0]//300)):
        tep = x[i*300: (i+1)*300].T
        train_feature_T.append(tep)

        label = np.ones(tep.shape[0])*i
        train_lable_T.append(label)

    train_feature_T = np.array(train_feature_T).reshape([-1, 300])
    train_lable_T = np.array(train_lable_T).reshape([-1])
    
    return train_feature_T, train_lable_T

In [4]:
filedir = "tep_train"
filenames = []
train_feature = []

for filename in os.listdir(filedir):
    filenames.append(os.path.join(filedir,filename))
    
filenames = [
'tep_test\\d01_te.dat', 'tep_test\\d02_te.dat', 'tep_test\\d03_te.dat', 'tep_test\\d04_te.dat', 
'tep_test\\d05_te.dat', 'tep_test\\d06_te.dat', 'tep_test\\d07_te.dat', 'tep_test\\d08_te.dat', 

'tep_test\\d10_te.dat', 'tep_test\\d11_te.dat', 'tep_test\\d12_te.dat', 'tep_test\\d13_te.dat', 
'tep_test\\d14_te.dat', 'tep_test\\d15_te.dat', 'tep_test\\d16_te.dat', 'tep_test\\d17_te.dat', 
'tep_test\\d18_te.dat', 'tep_test\\d19_te.dat', 'tep_test\\d20_te.dat', 'tep_test\\d21_te.dat'
]

for i in range(len(filenames)):
    train_feature_single = []
    
    tep = np.genfromtxt(filenames[i])[160:160+300]
    train_feature_single.append(tep)
    train_feature_single = np.array(train_feature_single)
    train_feature.append(train_feature_single)
    
train_feature = np.array(train_feature).reshape([len(filenames), tep.shape[0], tep.shape[1]])

TEPs_fault = train_feature[:,:,:]

故障類別數，某一故障+正常，序列長度，變量數:  (20, 300, 52)
故障數據---故障類別，序列長度，變量數:  (20, 300, 52)


In [6]:
train_feature_T, train_lable_T = transpose_for_shapelet(train_feature)


(1040, 300)
(1040,)


In [7]:
def st_fitting(x, y, n_shapelets_, window_sizes_):
    
    st = ShapeletTransform(n_shapelets = n_shapelets_,
                           window_sizes = window_sizes_,
                           random_state = 42, sort=True,
                           n_jobs = -1, remove_similar = True
                          )
    x_new = st.fit_transform(x, y)

    return st

In [8]:
selected_length = int(300*0.9)

In [9]:
time_start = time.time()
train_st = st_fitting(train_feature_T, train_lable_T, 10000, [selected_length])
time_end = time.time()
print('time cost',time_end-time_start,'s')

time cost 289.9020872116089 s


In [10]:
folder = "TEP_shapelet_preprocess_ValSet_AllFault_win0.9-1/"

if not os.path.isdir(folder):
    os.mkdir(folder)

In [11]:
folder = "TEP_shapelet_preprocess_ValSet_AllFault_win0.9-1/"
np.save(folder + 'val_index', train_st.indices_) 

In [12]:
filedir = "TEP_shapelet_preprocess_ValSet_AllFault_win0.9-1"

filenames = []
train_feature = []

for filename in os.listdir(filedir):
    filenames.append(os.path.join(filedir,filename))
    
print(filenames)

fault_index = np.load(filenames[0]) 
print(fault_index.shape)

['TEP_shapelet_preprocess_ValSet_AllFault_win0.9-1\\val_index.npy']
(1040, 3)


In [13]:
fault_index

array([[315,   1, 271],
       [344,   2, 272],
       [476,  25, 295],
       ...,
       [ 98,  20, 290],
       [309,  24, 294],
       [297,   1, 271]], dtype=int64)

In [14]:
fault_index_1 = []
fault_index_2 = []
fault_index_3 = []
fault_index_4 = []
fault_index_5 = []
fault_index_6 = []
fault_index_7 = []
fault_index_8 = []
fault_index_9 = []
fault_index_10 = []
fault_index_11 = []
fault_index_12 = []
fault_index_13 = []
fault_index_14 = []
fault_index_15 = []
fault_index_16 = []
fault_index_17 = []
fault_index_18 = []
fault_index_19 = []
fault_index_20 = []

for i in range(len(fault_index)):
    index_ = int(fault_index[i, 0]//52)
    if index_ == 0: fault_index_1.append(fault_index[i])
    elif index_ == 1: fault_index_2.append(fault_index[i])
    elif index_ == 2: fault_index_3.append(fault_index[i])        
    elif index_ == 3: fault_index_4.append(fault_index[i])        
    elif index_ == 4: fault_index_5.append(fault_index[i])
    elif index_ == 5: fault_index_6.append(fault_index[i])
    elif index_ == 6: fault_index_7.append(fault_index[i])
    elif index_ == 7: fault_index_8.append(fault_index[i])
    elif index_ == 8: fault_index_9.append(fault_index[i])        
    elif index_ == 9: fault_index_10.append(fault_index[i])        
    elif index_ == 10: fault_index_11.append(fault_index[i])
    elif index_ == 11: fault_index_12.append(fault_index[i])
    elif index_ == 12: fault_index_13.append(fault_index[i])
    elif index_ == 13: fault_index_14.append(fault_index[i])
    elif index_ == 14: fault_index_15.append(fault_index[i])
    elif index_ == 15: fault_index_16.append(fault_index[i])
    elif index_ == 16: fault_index_17.append(fault_index[i])
    elif index_ == 17: fault_index_18.append(fault_index[i])
    elif index_ == 18: fault_index_19.append(fault_index[i])
    elif index_ == 19: fault_index_20.append(fault_index[i])
        
fault_index_1 = np.array(fault_index_1)
fault_index_2 = np.array(fault_index_2)
fault_index_3 = np.array(fault_index_3)
fault_index_4 = np.array(fault_index_4)
fault_index_5 = np.array(fault_index_5)
fault_index_6 = np.array(fault_index_6)
fault_index_7 = np.array(fault_index_7)
fault_index_8 = np.array(fault_index_8)
fault_index_9 = np.array(fault_index_9)
fault_index_10 = np.array(fault_index_10)
fault_index_11 = np.array(fault_index_11)
fault_index_12 = np.array(fault_index_12)
fault_index_13 = np.array(fault_index_13)
fault_index_14 = np.array(fault_index_14)
fault_index_15 = np.array(fault_index_15)
fault_index_16 = np.array(fault_index_16)
fault_index_17 = np.array(fault_index_17)
fault_index_18 = np.array(fault_index_18)
fault_index_19 = np.array(fault_index_19)
fault_index_20 = np.array(fault_index_20)

fault_index_all = []

fault_index_all.append(fault_index_1)
fault_index_all.append(fault_index_2)
fault_index_all.append(fault_index_3)
fault_index_all.append(fault_index_4)
fault_index_all.append(fault_index_5)
fault_index_all.append(fault_index_6)
fault_index_all.append(fault_index_7)
fault_index_all.append(fault_index_8)
fault_index_all.append(fault_index_9)
fault_index_all.append(fault_index_10)
fault_index_all.append(fault_index_11)
fault_index_all.append(fault_index_12)
fault_index_all.append(fault_index_13)
fault_index_all.append(fault_index_14)
fault_index_all.append(fault_index_15)
fault_index_all.append(fault_index_16)
fault_index_all.append(fault_index_17)
fault_index_all.append(fault_index_18)
fault_index_all.append(fault_index_19)
fault_index_all.append(fault_index_20)

# print(fault_index_1.shape)
# print(fault_index_2.shape)
# print(fault_index_3.shape)
# print(fault_index_4.shape)
# print(fault_index_5.shape)
# print(fault_index_6.shape)
# print(fault_index_7.shape)
# print(fault_index_8.shape)
# print(fault_index_9.shape)
# print(fault_index_10.shape)
# print(fault_index_11.shape)
# print(fault_index_12.shape)
# print(fault_index_13.shape)
# print(fault_index_14.shape)
# print(fault_index_15.shape)
# print(fault_index_16.shape)
# print(fault_index_17.shape)
# print(fault_index_18.shape)
# print(fault_index_19.shape)
# print(fault_index_20.shape)

In [15]:
def np_unranked_unique(nparray):
    n_unique = len(np.unique(nparray))
    ranked_unique = np.zeros([n_unique])
    i = 0
    for x in nparray:
        if x not in ranked_unique:
            ranked_unique[i] = x
            i += 1
    return ranked_unique

In [16]:
def index_selection(indices_input):
    indices = indices_input
    
    selected_index = []

    for i in range(len(indices)):        
#         if indices[i,0] >= 52:
        selected_index.append(indices[i])
        
    selected_index = np.array(selected_index)
#     selected_index_sort = np.sort(selected_index, axis=0)
    
    selected_start = selected_index[:,1]
    selected_start = np_unranked_unique(selected_start)
    
    time_point_select_start = int(selected_start[0])
    time_point_select_end = int(selected_start[0] + selected_length)
    
    return time_point_select_start, time_point_select_end

In [17]:
def tep_fault_selection(tep_fault, time_point_select_start, time_point_select_end):
    selected_tep_fault = []
    
    selected_tep_fault.append(tep_fault[time_point_select_start: time_point_select_end])
    selected_tep_fault = np.array(selected_tep_fault).reshape([1, -1, TEPs_fault.shape[-1]])
    
    return selected_tep_fault

In [18]:
folder = 'TEP_shapelet_preprocess_ValSet_AllFault_win0.9-2/'

if not os.path.isdir(folder):
    os.mkdir(folder)

In [19]:
len(fault_index_all)

20

In [20]:
selected_TEPs_fault = []

for i in range(len(fault_index_all)):
    tep_fault = TEPs_fault[i]
    indices = fault_index_all[i]
    
    time_point_select_start, time_point_select_end = index_selection(indices)
    selected_tep_fault = tep_fault_selection(tep_fault, time_point_select_start, time_point_select_end)
    print(selected_tep_fault.shape)
    
#     break
    
    if i <= 9: i = '0' + str(i)
        
    save_name = folder + 'val_selected_tep_fault' + str(i)
    
    np.save(save_name, selected_tep_fault) 

(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
(1, 270, 52)
