In [None]:
!unzip /content/good_data_for_conv.zip
!unzip /content/bad_data_for_conv_110_2.3_2_7.zip
!unzip /content/bad_data_for_conv_150_1.9_8_10.zip
!unzip /content/bad_data_for_conv_30_2_10_8.zip
!unzip /content/bad_data_for_conv_50_2.2_5_7.zip

In [None]:
!unzip /content/good_data_for_conv.zip
!unzip /content/bad_data_for_conv_110_2.3_2_7.zip

In [None]:
!unzip /content/Syntatic.zip
!unzip /content/FinalAnomalous.zip
!unzip /content/FinalNormal.zip

# Model Creation

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = F.relu(self.bn1(out))
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=2):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3)
        self.bn = nn.BatchNorm1d(64)
        self.layer1 = self.make_layer(block, 64, layers[0])
        self.layer2 = self.make_layer(block, 128, layers[1], 2)
        self.layer3 = self.make_layer(block, 256, layers[2], 2)
        self.layer4 = self.make_layer(block, 512, layers[3], 2)
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm1d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn(self.conv(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


# Data Loader For Multiclass and Binary, non GNN Peaks

In [None]:
import numpy as np
import torch
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
import glob
import os

def load_data(folders):
    data = []
    labels = []
    for label, folder in enumerate(folders):
        for csv_file in glob.glob(folder + '/*.csv'):
            df = pd.read_csv(csv_file)
            data.append(df.values)
            labels.append(label)
    data = np.array(data)
    return torch.tensor(data, dtype=torch.float32), torch.tensor(labels, dtype=torch.long)


def create_datasets_and_loaders(folders, batch_size):
    data, labels = load_data(folders)
    data = data.reshape(data.shape[0], 1, data.shape[1])


    dataset = TensorDataset(data, labels)


    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])


    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader



class_folders = [
    '/content/good_data_for_conv' ,
    '/content/bad_data_for_conv_110_2.3_2_7'
]


train_loader, test_loader = create_datasets_and_loaders(class_folders, batch_size=16)

In [None]:
data, labels = next(iter(train_loader))


print(f'Data dimensions: {data.shape}')
print(f'Labels dimensions: {labels.shape}')

Data dimensions: torch.Size([16, 1, 250])
Labels dimensions: torch.Size([16])


# Dataloader For Binary

In [None]:
import glob
import pandas as pd
import numpy as np

def load_data_from_folder(folder, label):
    data, labels = [], []
    for file in glob.glob(f"{folder}/*.csv"):
        df = pd.read_csv(file)
        data.append(df.values)
        labels.append(label)
    return data, labels

def split_data(data, labels, split_ratio=0.8):
    dataset_size = len(data)
    indices = list(range(dataset_size))
    split = int(np.floor(split_ratio * dataset_size))
    np.random.shuffle(indices)

    train_indices, test_indices = indices[:split], indices[split:]

    train_data = [data[i] for i in train_indices]
    train_labels = [labels[i] for i in train_indices]
    test_data = [data[i] for i in test_indices]
    test_labels = [labels[i] for i in test_indices]

    return train_data, train_labels, test_data, test_labels

def create_data_loaders(normal_folder, real_anomalies_folder, syntactic_anomalies_folder, batch_size):
    # Load and split data
    normal_data, normal_labels = load_data_from_folder(normal_folder, 0)
    print(len(normal_data))
    real_anomalies_data, real_anomalies_labels = load_data_from_folder(real_anomalies_folder, 1)
    print(len( real_anomalies_data))
    syntactic_anomalies_data, syntactic_anomalies_labels = load_data_from_folder(syntactic_anomalies_folder, 1)
    print(len(syntactic_anomalies_data))
    normal_train_data, normal_train_labels, normal_test_data, normal_test_labels = split_data(normal_data, normal_labels)
    syntactic_train_data, syntactic_train_labels, _, _ = split_data(syntactic_anomalies_data, syntactic_anomalies_labels)
    real_train_data, real_train_labels, real_test_data, real_test_labels = split_data(real_anomalies_data, real_anomalies_labels)

    # Combine train data
    train_data = normal_train_data + syntactic_train_data + real_train_data
    train_labels = normal_train_labels + syntactic_train_labels + real_train_labels

    # Combine test data
    test_data = normal_test_data + real_test_data + real_test_data
    test_labels = normal_test_labels + real_test_labels + real_test_labels

    # Convert to numpy arrays and reshape
    train_data = np.array(train_data).reshape(len(train_data), 1, -1)
    train_labels = np.array(train_labels)
    test_data = np.array(test_data).reshape(len(test_data), 1, -1)
    test_labels = np.array(test_labels)

    # Create TensorDatasets
    train_dataset = TensorDataset(torch.tensor(train_data, dtype=torch.float32), torch.tensor(train_labels, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(test_data, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

def count_labels(loader):
    label_counts = {0: 0, 1: 0}
    for _, labels in loader:
        for label in labels.numpy():
            label_counts[label] += 1
    return label_counts


In [None]:
# Define folder paths
folders = {
    'normal': '/content/Final Normal',
    'real_anomalies': '/content/Final Anomalous',
    'syntactic_anomalies': '/content/Syntatic'
}

# Create DataLoaders
train_loader, test_loader = create_data_loaders(folders['normal'], folders['real_anomalies'], folders['syntactic_anomalies'], batch_size=16)


data, labels = next(iter(train_loader))

print(f'Data dimensions: {data.shape}')
print(f'Labels dimensions: {labels.shape}')


2501
1501
1500
Data dimensions: torch.Size([16, 1, 500])
Labels dimensions: torch.Size([16])


In [None]:
def count_labels(loader):
    label_counts = {0: 0, 1: 0}
    for _, labels in loader:
        for label in labels.numpy():
            label_counts[label] += 1
    return label_counts

# Count and print label distributions
train_label_counts = count_labels(train_loader)
test_label_counts = count_labels(test_loader)
print(f'Train loader label counts: {train_label_counts}')
print(f'Test loader label counts: {test_label_counts}')


Train loader label counts: {0: 2000, 1: 2400}
Test loader label counts: {0: 501, 1: 602}


# Model Training

In [None]:
from sklearn.metrics import precision_recall_fscore_support
def train_model(model, train_loader, test_loader, device, num_epochs=10):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = 100 * correct / total
    print(f'Accuracy of the model on the test set: {accuracy}%')

    precision, recall, f1_score, _ = precision_recall_fscore_support(all_labels, all_predictions, average='weighted')
    print(f'F1 Score: {f1_score:.4f}')

model = ResNet(ResidualBlock, [2, 2, 2, 2], num_classes=2)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

### Binary Classificaton, Synt Peaks

In [None]:
train_model(model, train_loader, test_loader, device, num_epochs=5)

Epoch [1/5], Loss: 0.0096
Epoch [2/5], Loss: 0.0039
Epoch [3/5], Loss: 0.0092
Epoch [4/5], Loss: 0.0058
Epoch [5/5], Loss: 0.0018
Accuracy of the model on the test set: 100.0%
F1 Score: 1.0000


### Five classes classification, Synt Peaks

In [None]:
train_model(model, train_loader, test_loader, device, num_epochs=3)

Epoch [1/3], Loss: 0.0213
Epoch [2/3], Loss: 0.0097
Epoch [3/3], Loss: 0.0116
Accuracy of the model on the test set: 99.94500962331593%
F1 Score: 0.9995


### Binary Classification on Real Data

In [None]:
train_model(model, train_loader, test_loader, device, num_epochs=3)

Epoch [1/3], Loss: 0.0202
Epoch [2/3], Loss: 0.0008
Epoch [3/3], Loss: 0.0022
Accuracy of the model on the test set: 100.0%
F1 Score: 1.0000


# Testing on Separate Files

In [None]:
import torch
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset



def preprocess_data(file_path):

    df = pd.read_csv(file_path)


    data = df.values.reshape(1, 1, -1)  # 1 sample, 1 channel, sequence length

    tensor_data = torch.tensor(data, dtype=torch.float32)

    return tensor_data

def classify_time_series(model, file_path, device):

    tensor_data = preprocess_data(file_path)

    tensor_data = tensor_data.to(device)

    model.eval()

    with torch.no_grad():
        outputs = model(tensor_data)
        _, predicted = torch.max(outputs, 1)

    prediction = predicted.item()

    return prediction



file_path = '/content/Final Anomalous/Anomalous_1003.csv'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
prediction = classify_time_series(model, file_path, device)
print(f'The time series is classified as: {prediction}')


The time series is classified as: 1
