In [1]:
#Environment_name = Dl_DANET
#Python = 3.6
#PyTorch version 1.8.2+cu111
#einops
#seaborn
#sklearn

In [2]:
pip show torch

/bin/bash: /home/ma00048/miniconda3/envs/DL/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Name: torch
Version: 1.8.2
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /home/ma00048/miniconda3/envs/Dl_DANET/lib/python3.6/site-packages
Requires: typing_extensions, numpy, dataclasses
Required-by: torchvision, torchaudio
Note: you may need to restart the kernel to use updated packages.


In [3]:
import torch
import os
import argparse
from torch.autograd import Variable
from DANet.DA_Net import DA_Net
from DANet.util import compute_F1_score, exponential_decay, save_result, plot_roc, random_seed
from DANet.read_data import load_UEA

In [4]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import cross_val_predict, KFold
import sklearn.preprocessing

import time
import sklearn
import numpy as np
import os
import sys
from joblib import parallel_backend

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
length = 1536 * 2
parser = argparse.ArgumentParser(description='DA-Net for MTSC')

#parser.add_argument('--model', type=str, default='DA-Net')
parser.add_argument('--device', type=str, default='cuda' if torch.cuda.is_available() else 'cpu')
parser.add_argument('--length', type=int, default=1000, help='Embedding length')
#parser.add_argument('--writer_path', type=str, default='runs/exp', help='TensorBoard path')
parser.add_argument('--data_path', type=str, default="../datasets")
#parser.add_argument('--seed', type=int, default=1, help='random seed')
parser.add_argument('--dropout', type=float, default=0.05, help='attention dropout rate')
parser.add_argument('--batch_size', type=int, default=16)
parser.add_argument('--n_epochs', type=int, default=2)
parser.add_argument('--cache_path', type=str, default='./DANet/cache')
parser.add_argument('--window', type=int, default=64)  # [32,48,64,80,96]
parser.add_argument('--M_name', type=str, default='DA-Net')

args = parser.parse_args("")
M_name=args.M_name

length = 1536 * 2


In [6]:
def GetDataAndNet(archive_path, archive_name, wa, prob, mask=1):
    train_loader, test_loader, num_class = load_UEA(X_train, X_test,y_train,y_test, args)

    # get the length and channel of time series
    #time_stmp = next(train_loader.__iter__())[0].shape[2]
    time_stmp = train_loader.__iter__().next()[0].shape[2]
    #in_channel = next(train_loader.__iter__())[0].shape[1]
    in_channel = train_loader.__iter__().next()[0].shape[1]
    # num_class = DealDataset(train_path).num_class()

    net = DA_Net(
        t=time_stmp,
        down_dim=length,
        hidden_dim=(96, 192, 62),
        layers=(2, 2, 6, 2),

        heads=(3, 6, 12,24),
        channels=in_channel,
        num_classes=num_class,
        head_dim=32,
        window_size=args.window,
        downscaling_factors=(4, 2, 2,2),  # 代表多长的时间作为一个特征

        relative_pos_embedding=True,
        wa=wa,
        prob=prob,
        mask=mask,).to(device)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        net = torch.nn.DataParallel(net)
    return train_loader, test_loader, net, num_class

In [7]:
def test(epoch):
    total_pred = torch.tensor([], dtype=torch.int64).to(device)
    total_true = torch.tensor([], dtype=torch.int64).to(device)
    score_list = []
    label_list = []
    total_test_acc = 0
    # for batch_id, (x, y) in tqdm(enumerate(test_loader), total=len(test_loader)):
    for batch_id, (x, y) in enumerate(test_loader):

        #x = x.float().to(device)
        x = Variable(x).float().to(device)
        #y = y.to(device)
        y = Variable(y).to(device)
        net.eval()
        start_time = time.time()
        embedding, encoder, output, pred_y = net(x)
        inference_time = time.time() - start_time

        _, y_pred = torch.max(pred_y, -1)
        total_test_acc += (y_pred.cpu() == y.cpu()).sum().item()

        total_pred = torch.cat([total_pred, y_pred], dim=0)
        total_true = torch.cat([total_true, y], dim=0)

        test_loss = loss_func(pred_y, y.to(torch.long))

        niter = epoch * test_loader.dataset.__len__() + batch_id
#         if niter % 10 == 0:
#             writer.add_scalar('Test Loss Curve {0}({1})'.format(M_name, length), test_loss.data.item(), niter)

        score_list.extend(pred_y.detach().cpu().numpy())
        label_list.extend(y.cpu().numpy())

    #plot_roc( num_class, label_list, score_list, L=length)

    f1_score, precision, recall = compute_F1_score(total_true, total_pred)

    return total_test_acc, f1_score, precision, recall, inference_time, test_loss

In [8]:
def train(optimizer):
    train_time = 0
    max_accuracy = 0
    plot_train_loss = []
    plot_test_loss = []
    plot_train_acc = []
    plot_test_acc = []
    for epoch in range(n_epochs):
        ls = []
        s_time = time.time()
        total_train_acc = 0


        # for batch_id,(x,y) in tqdm(enumerate(train_loader), total=len(train_loader)):
        for batch_id, (x, y) in enumerate(train_loader):
            #torch ALEXNET
            net.train()
            optimizer = exponential_decay(optimizer, LEARNING_RATE, global_epoch, 1, 0.90)

            #x = x.float().to(device)
            x = Variable(x).float().to(device)
            #y = y.to(device)
            y = Variable(y).to(device)
            # output 我们需要的 all_sample
            embedding, encoder, output, pred_y = net(x)
            # loss
            loss = loss_func(pred_y, y.to(torch.long))

            _, y_pred = torch.max(pred_y, -1)
            acc_train = (y_pred.cpu() == y.cpu()).sum().item()
            total_train_acc += acc_train
            niter = epoch * train_loader.dataset.__len__() + batch_id

            #if niter % 10 == 0:
                #writer.add_scalar('Train Loss Curve {0}({1})'.format(M_name, length), loss.data.item(), niter)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, y_pred = torch.max(pred_y, -1)
            ls.append(loss)

        print('Epoch: {:04d}'.format(epoch + 1),
              'loss_train: {:.8f}'.format(loss.item()),
              'acc_train: {:.4f}'.format(total_train_acc / train_loader.dataset.__len__()),
              'time: {:.4f}s'.format(time.time() - s_time))
        plot_train_loss.append(loss.item())
        plot_train_acc.append(total_train_acc / train_loader.dataset.__len__())
        train_time += time.time() - s_time

        # print("Total time elapsed: {:.4f}s".format(train_time))
        total_test_acc, f1_score, precision, recall, inference_time, test_loss = test(epoch)
        plot_test_loss.append(test_loss.cpu().detach())
        plot_test_acc.append(total_test_acc / test_loader.dataset.__len__())

        # save model
        if os.path.exists(f'DANet/saved_model/{M_name}') == False:
            os.makedirs(f'DANet/saved_model/{M_name}')

        if total_test_acc > max_accuracy:
            print('save best model')
            max_accuracy = total_test_acc
            torch.save(net,
                       f'DANet/saved_model/{M_name}/{Dataset_name} batch={args.batch_size} length={length} window={args.window}.pkl')

        print('Epoch: {:04d}'.format(epoch + 1),
              'loss_test: {:.8f}'.format(test_loss.item()),
              'acc_test: {:.4f}'.format(total_test_acc / test_loader.dataset.__len__()),
              'time: {:.4f}s'.format(time.time() - s_time))
    #plt.plot()

    if os.path.exists(f'./DANet/DA_Net_results') == False:
        os.makedirs(f'./DANet/DA_Net_results')
    save_result(file, ls[-1], total_test_acc / test_loader.dataset.__len__(), f1_score, precision, recall, train_time,
                inference_time, args.window, length)


In [9]:
# define a list of datasets
datasets = [
"BEARING_Univar",
"PHM2022_Multivar",
"PHM2022_Univar_PIN",
"PHM2022_Univar_PO",
"PHM2022_Univar_PDIN",
"ETCHING_Multivar",
"MFPT_48KHZ_Univar",
"MFPT_96KHZ_Univar",
"PADERBORN_6KHZ_Univar",
"PADERBORN_4KHZ_Univar",
"PADERBORN_64KHZ_Multivar",
"PADERBORN_4KHZ_Multivar",
"Hydraulic_systems_10HZ_Multivar",
"Hydraulic_systems_100HZ_Multivar",
"Gas_sensors_home_activity",
"Control_charts",
"CWRU_12k_DE_univar",
"CWRU_12k_DE_multivar",
"CWRU_12k_FE_univar",
"CWRU_12k_FE_multivar",
"CWRU_48k_DE_univar",
"CWRU_48k_DE_multivar"
]
datasets = ["PHM2022_Multivar", "PHM2022_Univar_PDIN"]
datasets_path = "../datasets"

for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    print(Dataset.shape)
    

    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")

(53785, 749, 3)
(53785, 749, 1)


In [10]:
# change this directory for your machine
root_dir = './'

# define the number of folds
n_folds = 5

# perform cross-validation for each dataset and algorithm combination
for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    

    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")
    


    # Create a folder for results
    results_path = root_dir + "Results/" + Dataset_name
    if os.path.exists(results_path):
        pass
    else:
        try:
            os.makedirs(results_path)
        except:
            # in case another machine created the path meanwhile !:(
            pass

        
    t_total = time.time() ##Start timing
    


    print(f"\n The dataset shape is:{Dataset.shape}")
    print(f"\n The number of data samples (N) is:{Dataset.shape[0]}")
    print(f"\n The number of TS length (T) is:{Dataset.shape[1]}")
    print(f"\n The number of TS dimention (M) is:{Dataset.shape[2]}")



    kf = KFold(n_splits=n_folds, shuffle=True)
    accuracy_scores = []
    f1_scores = []
    confusion_matrices = []
    report_list = []
    for fold, (train_idx, test_idx) in enumerate(kf.split(Dataset)):
        # split the data into training and testing sets
        X_train, X_test = Dataset[train_idx], Dataset[test_idx]
        y_train, y_test = Labels[train_idx], Labels[test_idx]
        
        wa=1
        prob=1
        file = r'./DANet/DA_Net_results/result_{0}_fold_{1}.csv'.format(Dataset_name, fold+1)

        train_loader, test_loader, net, num_class = GetDataAndNet(0, Dataset_name, wa, prob)

        LEARNING_RATE = 0.001
        optimizer = torch.optim.Adam(
            net.parameters(),
            lr=10,
            betas=(0.9, 0.999),
            eps=1e-08)
        global_epoch = 0
        global_step = 0
        best_tst_accuracy = 0.0
        COMPUTE_TRN_METRICS = True
        n_epochs = args.n_epochs

        loss_func = torch.nn.CrossEntropyLoss()

        ## Create Classification module
        train(optimizer)
        

            
        # calculate the evaluation metrics
#         accuracy = total_test_acc
#         print(accuracy)

#         f1 = f1_score
#         print(f1)

#         confusion = confusion_matrix(y_test, y_pred)
#         print(confusion)

#         accuracy_scores.append(accuracy)
#         f1_scores.append(f1)
#         confusion_matrices.append(confusion)

#         report = classification_report(y_test, y_pred, zero_division=1)
#         report_list.append(report)
#         print(report)
        
#         print(f" fold {fold+1} is Finished!")
        
#         # save the output to a text file
#         with open(f'{results_path}/dataset_{dataset_name}_DANet_fold_{fold+1}.txt', 'w') as f:
#             f.write(f'Accuracy: {accuracy}\n')
#             f.write(f'F1 Score: {f1}\n')
#             f.write(f'Confusion Matrix:\n{confusion}\n\n')
#             f.write(f'Classification report:\n{report}\n\n')
        
#     with open(f'{results_path}/dataset_{dataset_name}_DANet.txt', 'w') as f:
#         f.write("Mean accuracy: {:.3f} (std={:.3f})\n".format(np.mean(accuracy_scores), np.std(accuracy_scores)))
#         f.write("Mean F1 score: {:.3f} (std={:.3f})\n".format(np.mean(f1_scores), np.std(f1_scores)))
#         f.write("Mean confusion matrix:\n{}\n".format(np.array2string(np.mean(confusion_matrices, axis=0))))
#         f.write("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    print(" Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    



 The dataset shape is:(53785, 749, 3)

 The number of data samples (N) is:53785

 The number of TS length (T) is:749

 The number of TS dimention (M) is:3
Let's use 2 GPUs!
Epoch: 0001 loss_train: 0.50085771 acc_train: 0.8358 time: 274.3857s
save best model
Epoch: 0001 loss_test: 0.27797702 acc_test: 0.8950 time: 300.2248s
Epoch: 0002 loss_train: 1.59540093 acc_train: 0.9488 time: 270.9376s
save best model
Epoch: 0002 loss_test: 0.09731893 acc_test: 0.9555 time: 296.9882s
accuracy: 0.9554708561866692
parameters have been saved to ./DA_Net_results/result_PHM2022_Multivar_Dataset_fold_1.csv
Let's use 2 GPUs!
Epoch: 0001 loss_train: 0.31282175 acc_train: 0.8307 time: 267.6500s
save best model
Epoch: 0001 loss_test: 0.02904009 acc_test: 0.9381 time: 293.6216s
Epoch: 0002 loss_train: 0.00928279 acc_train: 0.9490 time: 269.5931s
save best model
Epoch: 0002 loss_test: 0.02023693 acc_test: 0.9654 time: 295.8775s
accuracy: 0.9654178674351584
parameters have been saved to ./DA_Net_results/resul

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import pandas as pd

def calculate_metrics(y_true, y_pred, duration, y_true_val=None, y_pred_val=None):
    res = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), index=[0],
                       columns=['precision', 'accuracy', 'recall', 'duration'])
    res['precision'] = precision_score(y_true, y_pred, average='macro')
    res['accuracy'] = accuracy_score(y_true, y_pred)

    if not y_true_val is None:
        # this is useful when transfer learning is used with cross validation
        res['accuracy_val'] = accuracy_score(y_true_val, y_pred_val)

    res['recall'] = recall_score(y_true, y_pred, average='macro')
    res['duration'] = duration
    return res

In [None]:
df_metrics = calculate_metrics(y_true, y_pred, 0.0)

In [None]:
classifier.predict(X_test, y_true,X_train,y_train,y_test,return_df_metrics = True)

In [None]:
import h5py
f = h5py.File('./Results/PHM2022_Multivar_Datasetbest_model.hdf5', 'r')

In [None]:
list(f.keys())