In [2]:
#Environment_name = Dl_DANET
#Python = 3.6
#PyTorch version 1.8.2+cu111
#einops
#seaborn
#sklearn

In [3]:
pip show torch

Name: torch
Version: 1.8.1
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages
Requires: numpy, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [5]:
from __future__ import division
from __future__ import print_function

import math
import sys
import time
import argparse

In [6]:
import torch.optim as optim
from TAPNet.models import TapNet
from TAPNet.utils import *
import torch.nn as nn
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import cross_val_predict, KFold
import sklearn.preprocessing

import time
import sklearn
import numpy as np
import os
import sys
from joblib import parallel_backend

In [8]:
parser = argparse.ArgumentParser()

# dataset settings
parser.add_argument('--data_path', type=str, default="../datasets",
                    help='the path of data.')
# parser.add_argument('--dataset', type=str, default="NATOPS", #NATOPS
#                     help='time series dataset. Options: See the datasets list')

# cuda settings
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
#parser.add_argument('--seed', type=int, default=42, help='Random seed.')

# Training parameter settings
parser.add_argument('--epochs', type=int, default=3000,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=1e-5,
                    help='Initial learning rate. default:[0.00001]')
parser.add_argument('--wd', type=float, default=1e-3,
                    help='Weight decay (L2 loss on parameters). default: 5e-3')
parser.add_argument('--stop_thres', type=float, default=1e-9,
                    help='The stop threshold for the training error. If the difference between training losses '
                         'between epoches are less than the threshold, the training will be stopped. Default:1e-9')

# Model parameters
parser.add_argument('--use_cnn', type=boolean_string, default=True,
                    help='whether to use CNN for feature extraction. Default:False')
parser.add_argument('--use_lstm', type=boolean_string, default=True,
                    help='whether to use LSTM for feature extraction. Default:False')
parser.add_argument('--use_rp', type=boolean_string, default=True,
                    help='Whether to use random projection')
parser.add_argument('--rp_params', type=str, default='-1,3',
                    help='Parameters for random projection: number of random projection, '
                         'sub-dimension for each random projection')
parser.add_argument('--use_metric', action='store_true', default=False,
                    help='whether to use the metric learning for class representation. Default:False')
parser.add_argument('--metric_param', type=float, default=0.01,
                    help='Metric parameter for prototype distances between classes. Default:0.000001')
parser.add_argument('--filters', type=str, default="256,256,128",
                    help='filters used for convolutional network. Default:256,256,128')
parser.add_argument('--kernels', type=str, default="8,5,3",
                    help='kernels used for convolutional network. Default:8,5,3')
parser.add_argument('--dilation', type=int, default=1,
                    help='the dilation used for the first convolutional layer. '
                         'If set to -1, use the automatic number. Default:-1')
parser.add_argument('--layers', type=str, default="500,300",
                    help='layer settings of mapping function. [Default]: 500,300')
parser.add_argument('--dropout', type=float, default=0,
                    help='Dropout rate (1 - keep probability). Default:0.5')
parser.add_argument('--lstm_dim', type=int, default=128,
                    help='Dimension of LSTM Embedding.')

args = parser.parse_args("")
args.cuda = not args.no_cuda and torch.cuda.is_available()

In [9]:
# np.random.seed(args.seed)
# torch.manual_seed(args.seed)
# if args.cuda:
#     torch.cuda.manual_seed(args.seed)
args.sparse = True
args.layers = [int(l) for l in args.layers.split(",")]
args.kernels = [int(l) for l in args.kernels.split(",")]
args.filters = [int(l) for l in args.filters.split(",")]
args.rp_params = [float(l) for l in args.rp_params.split(",")]

if not args.use_lstm and not args.use_cnn:
    print("Must specify one encoding method: --use_lstm or --use_cnn")
    print("Program Exiting.")
    exit(-1)

print("\nParameters:")
for attr, value in sorted(args.__dict__.items()):
    print("\t{}={}".format(attr.upper(), value))



Parameters:
	CUDA=False
	DATA_PATH=../datasets
	DILATION=1
	DROPOUT=0
	EPOCHS=3000
	FILTERS=[256, 256, 128]
	KERNELS=[8, 5, 3]
	LAYERS=[500, 300]
	LR=1e-05
	LSTM_DIM=128
	METRIC_PARAM=0.01
	NO_CUDA=False
	RP_PARAMS=[-1.0, 3.0]
	SPARSE=True
	STOP_THRES=1e-09
	USE_CNN=True
	USE_LSTM=True
	USE_METRIC=False
	USE_RP=True
	WD=0.001


In [10]:
# training function
def train():
    loss_list = [sys.maxsize]
    test_best_possible, best_so_far = 0.0, sys.maxsize
    for epoch in range(args.epochs):

        t = time.time()
        model.train()
        optimizer.zero_grad()

        output, proto_dist = model(input)

        loss_train = F.cross_entropy(output[idx_train], torch.squeeze(labels[idx_train]))
        if args.use_metric:
            loss_train = loss_train + args.metric_param * proto_dist

        if abs(loss_train.item() - loss_list[-1]) < args.stop_thres \
                or loss_train.item() > loss_list[-1]:
            break
        else:
            loss_list.append(loss_train.item())

        acc_train = accuracy(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()

        loss_val = F.cross_entropy(output[idx_val], torch.squeeze(labels[idx_val]))
        acc_val = accuracy(output[idx_val], labels[idx_val])

        print('Epoch: {:04d}'.format(epoch + 1),
              'loss_train: {:.8f}'.format(loss_train.item()),
              'acc_train: {:.4f}'.format(acc_train.item()),
              'loss_val: {:.4f}'.format(loss_val.item()),
              'acc_val: {:.4f}'.format(acc_val.item()),
              'time: {:.4f}s'.format(time.time() - t))

        if acc_val.item() > test_best_possible:
            test_best_possible = acc_val.item()
        if best_so_far > loss_train.item():
            best_so_far = loss_train.item()
            test_acc = acc_val.item()
    print("test_acc: " + str(test_acc))
    print("best possible: " + str(test_best_possible))

In [9]:
# test function
def test():
    output, proto_dist = model(input)
    loss_test = F.cross_entropy(output[idx_test], torch.squeeze(labels[idx_test]))
    if args.use_metric:
        loss_test = loss_test - args.metric_param * proto_dist

    acc_test, f1 = accuracy(output[idx_test], labels[idx_test])
    print(acc_test)
    
    print(Dataset_name, "Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()),
          "F1-score= {:.4f}".format(f1.item()))
    
    accuracy_scores.append(acc_test.item())
    f1_scores.append(f1.item())

    # save the output to a text file
    with open(f'{results_path}/dataset_{dataset_name}_TAPNet_fold_{fold+1}.txt', 'w') as f:
        f.write(f'Accuracy: {acc_test.item()}\n')
        f.write(f'F1 Score: {f1.item()}\n')

In [11]:
# define a list of datasets
datasets = ["PHM2022_Multivar", "PHM2022_Univar_PDIN"]
datasets_path = "../datasets"

for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    print(Dataset.shape)
    

    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")

(53785, 749, 3)
(53785, 749, 1)


In [12]:
# change this directory for your machine
root_dir = './'

# define the number of folds
n_folds = 5

# perform cross-validation for each dataset and algorithm combination
for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    

    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")
    


    # Create a folder for results
    results_path = root_dir + "Results/" + Dataset_name
    if os.path.exists(results_path):
        pass
    else:
        try:
            os.makedirs(results_path)
        except:
            # in case another machine created the path meanwhile !:(
            pass

        
    t_total = time.time() ##Start timing
    

    print("Loading dataset", Dataset_name, "...")
    print(f"\n The dataset shape is:{Dataset.shape}")
    print(f"\n The number of data samples (N) is:{Dataset.shape[0]}")
    print(f"\n The number of TS length (T) is:{Dataset.shape[1]}")
    print(f"\n The number of TS dimention (M) is:{Dataset.shape[2]}")

    
    # Model and optimizer
    model_type = "TapNet" 



    kf = KFold(n_splits=n_folds, shuffle=True)
    accuracy_scores = []
    f1_scores = []
    confusion_matrices = []
    report_list = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(Dataset)):
        # split the data into training and testing sets
        X_train, X_test = Dataset[train_idx], Dataset[test_idx]
        y_train, y_test = Labels[train_idx], Labels[test_idx]

        if model_type == "TapNet":

            features, labels, idx_train, idx_val, idx_test, nclass = load_raw_ts(X_train,X_test,y_train,y_test)


            # update random permutation parameter
            if args.rp_params[0] < 0:
                dim = features.shape[1]
                args.rp_params = [3, math.floor(dim / (3 / 2))]
            else:
                dim = features.shape[1]
                args.rp_params[1] = math.floor(dim / args.rp_params[1])
        
            args.rp_params = [int(l) for l in args.rp_params]
            print("rp_params:", args.rp_params)

            # update dilation parameter
            if args.dilation == -1:
                args.dilation = math.floor(features.shape[2] / 64)

            print("Data shape:", features.size())
            model = TapNet(nfeat=features.shape[1],
                        len_ts=features.shape[2],
                        layers=args.layers,
                        nclass=nclass,
                        dropout=args.dropout,
                        use_lstm=args.use_lstm,
                        use_cnn=args.use_cnn,
                        filters=args.filters,
                        dilation=args.dilation,
                        kernels=args.kernels,
                        use_metric=args.use_metric,
                        use_rp=args.use_rp,
                        rp_params=args.rp_params,
                        lstm_dim=args.lstm_dim
                        )
        
            # cuda
            if args.cuda:
                #model = nn.DataParallel(model) Used when you have more than one GPU. Sometimes work but not stable
                model.cuda()
                features, labels, idx_train = features.cuda(), labels.cuda(), idx_train.cuda()
            input = (features, labels, idx_train, idx_val, idx_test)

        # init the optimizer
        optimizer = optim.Adam(model.parameters(),
                            lr=args.lr, weight_decay=args.wd)
        
        train()
        print("Optimization Finished!")
        # Testing
        test()
            

        
    with open(f'{results_path}/dataset_{dataset_name}_TAPNet.txt', 'w') as f:
        f.write("Mean accuracy: {:.3f} (std={:.3f})\n".format(np.mean(accuracy_scores), np.std(accuracy_scores)))
        f.write("Mean F1 score: {:.3f} (std={:.3f})\n".format(np.mean(f1_scores), np.std(f1_scores)))
        f.write("Mean confusion matrix:\n{}\n".format(np.array2string(np.mean(confusion_matrices, axis=0))))
        f.write("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    print(" Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    


Loading dataset PHM2022_Multivar_Dataset ...

 The dataset shape is:(53785, 749, 3)

 The number of data samples (N) is:53785

 The number of TS length (T) is:749

 The number of TS dimention (M) is:3
rp_params: [3, 2]
Data shape: torch.Size([53785, 3, 749])
Layers [512, 500, 300]


: 

: 

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import pandas as pd

def calculate_metrics(y_true, y_pred, duration, y_true_val=None, y_pred_val=None):
    res = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), index=[0],
                       columns=['precision', 'accuracy', 'recall', 'duration'])
    res['precision'] = precision_score(y_true, y_pred, average='macro')
    res['accuracy'] = accuracy_score(y_true, y_pred)

    if not y_true_val is None:
        # this is useful when transfer learning is used with cross validation
        res['accuracy_val'] = accuracy_score(y_true_val, y_pred_val)

    res['recall'] = recall_score(y_true, y_pred, average='macro')
    res['duration'] = duration
    return res

In [None]:
df_metrics = calculate_metrics(y_true, y_pred, 0.0)

In [None]:
classifier.predict(X_test, y_true,X_train,y_train,y_test,return_df_metrics = True)

In [None]:
import h5py
f = h5py.File('./Results/PHM2022_Multivar_Datasetbest_model.hdf5', 'r')

In [None]:
list(f.keys())