In [1]:
import pandas as pd

# Load the .ts file
df = pd.read_csv('../datasets/classification/FaultDetectionA/FaultDetectionA_TRAIN.ts', sep='\t')

# If your file doesn't have headers or is space-separated, adjust accordingly:
# df = pd.read_csv('your_file.ts', sep=' ', header=None, index_col=0, parse_dates=True)

# Display the first few rows
print(df.head())

  @problemName FaultDetectionA_COMBO.ts
0                     @timestamps false
1                      @univariate true
2                     @equalLength true
3                    @seriesLength 5120
4                @classLabel true 0 1 2


In [2]:
int(list(df.iloc[7])[0].split(',')[-1].split(':')[1])

1

In [3]:
x = [i.split(",") for i in list(df.iloc[7])[0].split(':')]

In [4]:
x[0][0:3]

['0.015258789052416', '-0.030517578104832', '-0.0701904296411136']

In [5]:
int(x[1][0])

1

In [6]:
from torch.utils.data import Dataset
import torch
import numpy as np
from torch.utils.data import DataLoader

class FaultDataset(Dataset):
    """Dataset class for FaultDetectionA dataset. The dataset is available at: https://www.timeseriesclassification.com/descriptio.php?Dataset=FaultDetectionA"""
    def __init__(self, sequences, labels):
        self.labels = labels
        self.sequences = sequences
        self.num_classes = len(torch.unique(self.labels))  # count the number of unique labels

    def __len__(self):
        return self.sequences.shape[0]

    def __getitem__(self, idx):
        sequence = torch.reshape(self.sequences[idx], (-1, 1))  # dim: seq_len x num_features
        label = torch.reshape(self.labels[idx], (-1,))  # dim: 1 x 1

        return sequence, label

In [7]:
sequences = []
labels = []

for idx, row in df.loc[6:].iterrows():
    sequence, label = [i.split(",") for i in row.values[0].split(':')]
    sequences.append((list(map(float, sequence))))
    labels.append(float(label[0]))
labels = torch.tensor(labels, dtype=torch.float32)
sequences = torch.tensor(sequences, dtype=torch.float32)

In [8]:
len(sequences)

10912

In [9]:
len(labels)

10912

In [10]:
fault_ds = FaultDataset(sequences, labels)

In [11]:
fault_dl = DataLoader(fault_ds, batch_size=8, shuffle=True)

In [12]:
fault_ds.sequences.shape

torch.Size([10912, 5120])

In [13]:
from time import time


def train_one_epoch(model, epoch_index, train_data, val_data=None):
    """Train the model for one epoch."""
    train_running_loss = 0.
    train_last_loss = 0.
    train_correct = 0
    iterations = 0

    time_start = time()
    for i, (inputs, labels) in enumerate(train_data):
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            labels = labels.cuda()

        model["opt"].zero_grad()
        outputs = model["model"](inputs)
        loss = model["loss_fn"](outputs, labels.to(torch.long).reshape(-1))
        loss.backward()
        model["opt"].step()
        train_running_loss += loss.item()

        predictions = torch.argmax(outputs, dim=1)
        correct_labels = labels.squeeze()

        train_correct += (predictions == correct_labels).int().sum() / len(labels) * 100
        iterations += 1
        
        # print("{}/{}".format(i, len(train_data)))
    train_last_loss = train_running_loss / len(train_data)
    train_acc = (train_correct / iterations)

    # if val_data:
    #     val_running_loss = 0.
    #     val_last_loss = 0.
    #     val_correct = 0
    # 
    #     iterations = 0
    #     model["model"].eval()
    #     for i, (inputs, labels) in enumerate(val_data):
    #         if torch.cuda.is_available():
    #             inputs = inputs.cuda()
    #             labels = labels.cuda()
    #         outputs = model["model"](inputs)
    #         loss = model["loss_fn"](outputs, labels.to(torch.long).reshape(-1))
    #         val_running_loss += loss.item()
    # 
    #         predictions = torch.argmax(outputs, dim=1)
    #         correct_labels = labels.squeeze()
    # 
    #         val_correct += (predictions == correct_labels).int().sum() / len(labels) * 100
    #         iterations += 1
    #     val_last_loss = val_running_loss / len(val_data)
    #     val_acc = (val_correct / iterations)
    #     time_end = time()
    #     time_taken = time_end - time_start
    # 
    #     return train_last_loss, train_acc, val_last_loss, val_acc, time_taken

    time_end = time()
    time_taken = time_end - time_start

    return train_last_loss, train_acc, None, None, time_taken

In [14]:
from uuid import uuid4
from src.timeseries_transformer.constants import EPOCHS
from src.timeseries_transformer.timeseries_model import EncoderClassifier
from src.timeseries_transformer.constants import EMBED_SIZE, NUM_ATTN_HEADS, ENCODER_FF_DIM, DROPOUT, \
    NUM_ENCODER_BLOCKS, LEARNING_RATE
from torch.nn import CrossEntropyLoss
from torch.optim import Adam


model = EncoderClassifier(
        input_shape=(8, 5120, 1),
        embed_size=EMBED_SIZE,
        num_heads=NUM_ATTN_HEADS,
        ff_dim=ENCODER_FF_DIM,
        dropout=DROPOUT,
        num_blocks=NUM_ENCODER_BLOCKS,
        num_classes=3
    )
model.cuda()
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
print(sum(p.numel() for p in model.parameters()))

model_dict = {"id": str(uuid4())[0:8], "model": model, "loss_fn": criterion, "opt": optimizer}

for epoch in range(10):
    print('EPOCH {}:'.format(epoch + 1))

    model.train()
    train_loss, train_acc, _, _, epoch_time_taken = train_one_epoch(
                                                                    model_dict,
                                                                    epoch,
                                                                    fault_dl
                                                                )

    print(f"Training loss: {train_loss}")
    print(f"Training accuracy: {train_acc}")
    print(f"Time Taken: {epoch_time_taken}")

2240787
EPOCH 1:


  return self._call_impl(*args, **kwargs)


Training loss: 0.953915307467634
Training accuracy: 58.788490295410156
Time Taken: 98.70146059989929
EPOCH 2:
Training loss: 0.8566833143272707
Training accuracy: 68.93328094482422
Time Taken: 101.7760717868805
EPOCH 3:
Training loss: 0.820406312248574
Training accuracy: 72.6447982788086
Time Taken: 106.5754783153534
EPOCH 4:
Training loss: 0.8048240804602323
Training accuracy: 74.57844543457031
Time Taken: 109.0032434463501
EPOCH 5:
Training loss: 0.8022285750097543
Training accuracy: 74.51429748535156
Time Taken: 108.63611555099487
EPOCH 6:
Training loss: 0.7877069681533271
Training accuracy: 76.00806427001953
Time Taken: 108.69220018386841
EPOCH 7:
Training loss: 0.775310450503903
Training accuracy: 77.39186096191406
Time Taken: 109.0446195602417
EPOCH 8:
Training loss: 0.7660651583650595
Training accuracy: 78.34494018554688
Time Taken: 110.40758275985718
EPOCH 9:
Training loss: 0.7617961938255343
Training accuracy: 78.76649475097656
Time Taken: 112.87911534309387
EPOCH 10:
Training

In [None]:
# no encoder            666899 // allows for batch size 16
# with linear encoder  2240787 // allows for batch size 8
# with regular encoder 

In [26]:
for epoch in range(50):
    print('EPOCH {}:'.format(epoch + 1))

    model.train()
    train_loss, train_acc, _, _, epoch_time_taken = train_one_epoch(
                                                                    model_dict,
                                                                    epoch,
                                                                    fault_dl
                                                                )

    print(f"Training loss: {train_loss}")
    print(f"Training accuracy: {train_acc}")
    print(f"Time Taken: {epoch_time_taken}")

EPOCH 1:


  return self._call_impl(*args, **kwargs)


Training loss: 0.7036373778259999
Training accuracy: 84.68658447265625
Time Taken: 94.60729813575745
EPOCH 2:
Training loss: 0.6914240010847444
Training accuracy: 85.92375183105469
Time Taken: 85.26079201698303
EPOCH 3:
Training loss: 0.6896021556644496
Training accuracy: 86.07954406738281
Time Taken: 90.32964563369751
EPOCH 4:


KeyboardInterrupt: 

In [16]:
import pandas as pd

# Load the .ts file
df2 = pd.read_csv('../datasets/classification/FaultDetectionA/FaultDetectionA_TEST.ts', sep='\t')

# If your file doesn't have headers or is space-separated, adjust accordingly:
# df = pd.read_csv('your_file.ts', sep=' ', header=None, index_col=0, parse_dates=True)

# Display the first few rows
print(df2.head())

     @problemName test.ts
0       @timestamps false
1        @univariate true
2       @equalLength true
3      @seriesLength 5120
4  @classLabel true 0 1 2


In [20]:
sequences_test = []
labels_test = []

for idx, row in df2.loc[6:].iterrows():
    sequence, label = [i.split(",") for i in row.values[0].split(':')]
    sequences_test.append((list(map(float, sequence))))
    labels_test.append(float(label[0]))
labels_test = torch.tensor(labels_test, dtype=torch.float32)
sequences_test = torch.tensor(sequences_test, dtype=torch.float32)

In [21]:
sequences_test.shape

torch.Size([2728, 5120])

In [22]:
labels_test.shape

torch.Size([2728])

In [23]:
fault_ds_test = FaultDataset(sequences_test, labels_test)

In [24]:
fault_dl_test = DataLoader(fault_ds_test, batch_size=8, drop_last=True)

In [27]:
from src.timeseries_transformer.utils import evaluate_model

evaluate_model(model_dict, fault_dl_test)

  return self._call_impl(*args, **kwargs)


Time taken for inference: 0.0024671232945059052
Evaluation accuracy for model 9a4c3109: 66.67888641357422
              precision    recall  f1-score   support

           0       0.85      0.71      0.77       248
           1       0.68      0.56      0.61      1240
           2       0.63      0.77      0.69      1240

    accuracy                           0.67      2728
   macro avg       0.72      0.68      0.69      2728
weighted avg       0.67      0.67      0.66      2728

[[176  41  31]
 [ 30 689 521]
 [  1 285 954]]
