In [13]:
# split_and_save_graphs.py

import torch
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data
import random

def load_event_graphs(events_file, labels_file):
    graphs = []
    with open(events_file, 'r') as ef, open(labels_file, 'r') as lf:
        event_lines = ef.readlines()
        label_lines = lf.readlines()

        for event_line, label_line in zip(event_lines, label_lines):
            if not event_line.startswith("Sample"):
                continue

            parts = event_line.strip().split(':')
            values = list(map(float, parts[1].strip().split()))
            label_values = label_line.strip().split(',')

            try:
                label = int(label_values[-1])
            except ValueError:
                print(f"Error converting label: {label_values[-1]}")
                continue

            num_nodes = len(values)
            edge_index = [[i, i+1] for i in range(num_nodes - 1)] + [[i+1, i] for i in range(num_nodes - 1)]
            edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

            x = torch.tensor([[v] for v in values], dtype=torch.float)
            y = torch.tensor([label], dtype=torch.long)

            data = Data(x=x, edge_index=edge_index, y=y)
            graphs.append(data)

    return graphs

def split_and_save(graphs, train_file='train_graphs.pt', test_file='test_graphs.pt'):
    labels = [g.y.item() for g in graphs]
    train_graphs, test_graphs = train_test_split(graphs, test_size=0.2, random_state=42, stratify=labels)

    torch.save(train_graphs, train_file)
    torch.save(test_graphs, test_file)

    print(f"Saved {len(train_graphs)} training graphs to {train_file}")
    print(f"Saved {len(test_graphs)} testing graphs to {test_file}")

if __name__ == "__main__":
    events_file = 'events_output1.txt'
    labels_file = 'heuristic_matched_events_labels.csv'

    graphs = load_event_graphs(events_file, labels_file)
    print(f"Total graphs loaded: {len(graphs)}")

    split_and_save(graphs)


Total graphs loaded: 2602
Saved 2081 training graphs to train_graphs.pt
Saved 521 testing graphs to test_graphs.pt


In [17]:
import torch
from torch_geometric.data import Data

def load_and_inspect_graphs(file_path):
    # Load the saved graphs correctly
    graphs = torch.load(file_path, weights_only=False)

    print(f"Total graphs loaded from {file_path}: {len(graphs)}\n")

    for idx, graph in enumerate(graphs):
        print(f"--- Graph {idx+1} ---")
        print(f"Node Features (x):\n{graph.x}")
        print(f"Edge Index:\n{graph.edge_index}")
        print(f"Label (y): {graph.y.item()}\n")
        
        # Optional: only show first 5 graphs
        if idx >= 4:
            break

if __name__ == "__main__":
    train_file = 'train_graphs.pt'
    test_file = 'test_graphs.pt'

    print("\nTraining Graphs Info:")
    load_and_inspect_graphs(train_file)

    print("\nTesting Graphs Info:")
    load_and_inspect_graphs(test_file)



Training Graphs Info:
Total graphs loaded from train_graphs.pt: 2081

--- Graph 1 ---
Node Features (x):
tensor([[250.2563],
        [250.1122],
        [250.1122],
        [250.0160],
        [250.0160],
        [250.0160],
        [249.9359],
        [249.8718],
        [249.8238],
        [249.8238]])
Edge Index:
tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8]])
Label (y): 0

--- Graph 2 ---
Node Features (x):
tensor([[188.9419],
        [189.1534],
        [189.1118],
        [189.0605],
        [189.0220],
        [189.2624],
        [188.9980],
        [189.0172],
        [189.2223],
        [189.1374]])
Edge Index:
tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8]])
Label (y): 0

--- Graph 3 ---
Node Features (x):
tensor([[965.3375],
        [965.3775],
        [965.1772],
        [965.1772],
        [964.9369],
        [965

In [21]:
# train_model.py

import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_mean_pool
import random

class GATClassifier(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GATClassifier, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=4, dropout=0.2)
        self.conv2 = GATConv(hidden_channels * 4, hidden_channels, heads=2, dropout=0.2)
        self.classifier = torch.nn.Linear(hidden_channels * 2, out_channels)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.elu(self.conv1(x, edge_index))
        x = F.elu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)
        return self.classifier(x)

def train_model(train_graphs, epochs=20, lr=0.005):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GATClassifier(in_channels=1, hidden_channels=8, out_channels=2).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0
        random.shuffle(train_graphs)

        for data in train_graphs:
            data = data.to(device)
            optimizer.zero_grad()
            out = model(data)
            loss = F.cross_entropy(out, data.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            pred = out.argmax(dim=1)
            correct += (pred == data.y).sum().item()
            total += data.y.size(0)

        train_accuracy = correct / total
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_graphs):.4f}, Train Accuracy: {train_accuracy:.4f}")

    torch.save(model.state_dict(), 'trained_gat_model.pth')
    print("Model saved as 'trained_gat_model.pth'")
    return model

if __name__ == "__main__":
    # ✅ Corrected here: added weights_only=False
    train_graphs = torch.load('train_graphs.pt', weights_only=False)
    model = train_model(train_graphs)


Epoch 1/20, Loss: 0.0897, Train Accuracy: 0.9741
Epoch 2/20, Loss: 0.0389, Train Accuracy: 0.9827
Epoch 3/20, Loss: 0.0352, Train Accuracy: 0.9846
Epoch 4/20, Loss: 0.0372, Train Accuracy: 0.9856
Epoch 5/20, Loss: 0.0327, Train Accuracy: 0.9846
Epoch 6/20, Loss: 0.0346, Train Accuracy: 0.9846
Epoch 7/20, Loss: 0.0314, Train Accuracy: 0.9856
Epoch 8/20, Loss: 0.0320, Train Accuracy: 0.9870
Epoch 9/20, Loss: 0.0311, Train Accuracy: 0.9865
Epoch 10/20, Loss: 0.0350, Train Accuracy: 0.9846
Epoch 11/20, Loss: 0.0306, Train Accuracy: 0.9880
Epoch 12/20, Loss: 0.0253, Train Accuracy: 0.9875
Epoch 13/20, Loss: 0.0338, Train Accuracy: 0.9880
Epoch 14/20, Loss: 0.0320, Train Accuracy: 0.9846
Epoch 15/20, Loss: 0.0259, Train Accuracy: 0.9885
Epoch 16/20, Loss: 0.0342, Train Accuracy: 0.9827
Epoch 17/20, Loss: 0.0278, Train Accuracy: 0.9875
Epoch 18/20, Loss: 0.0294, Train Accuracy: 0.9880
Epoch 19/20, Loss: 0.0297, Train Accuracy: 0.9875
Epoch 20/20, Loss: 0.0319, Train Accuracy: 0.9870
Model sav

In [25]:
# test_model.py

import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_mean_pool
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

class GATClassifier(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GATClassifier, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=4, dropout=0.2)
        self.conv2 = GATConv(hidden_channels * 4, hidden_channels, heads=2, dropout=0.2)
        self.classifier = torch.nn.Linear(hidden_channels * 2, out_channels)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.elu(self.conv1(x, edge_index))
        x = F.elu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)
        return self.classifier(x)

def evaluate_model(model, graph_set):
    model.eval()
    preds = []
    labels = []
    device = next(model.parameters()).device

    with torch.no_grad():
        for i, data in enumerate(graph_set):
            data = data.to(device)
            out = model(data)
            pred = out.argmax(dim=1)
            preds.append(pred.item())
            labels.append(data.y.item())

            true_label = 'Normal' if data.y.item() == 0 else 'Abnormal'
            pred_label = 'Normal' if pred.item() == 0 else 'Abnormal'
            print(f"Sample {i+1}: Predicted: {pred_label}, True: {true_label}")

    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='binary')
    recall = recall_score(labels, preds, average='binary')
    f1 = f1_score(labels, preds, average='binary')
    cm = confusion_matrix(labels, preds)

    print("\n--- Test Set Evaluation ---")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    print(f"Confusion Matrix:\n{cm}")

if __name__ == "__main__":
    # ✅ Corrected here:
    test_graphs = torch.load('test_graphs.pt', weights_only=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GATClassifier(in_channels=1, hidden_channels=8, out_channels=2).to(device)
    model.load_state_dict(torch.load('trained_gat_model.pth'))
    print("Model loaded successfully.")

    evaluate_model(model, test_graphs)


Model loaded successfully.
Sample 1: Predicted: Normal, True: Normal
Sample 2: Predicted: Normal, True: Normal
Sample 3: Predicted: Normal, True: Normal
Sample 4: Predicted: Normal, True: Normal
Sample 5: Predicted: Normal, True: Normal
Sample 6: Predicted: Normal, True: Normal
Sample 7: Predicted: Normal, True: Normal
Sample 8: Predicted: Normal, True: Normal
Sample 9: Predicted: Normal, True: Normal
Sample 10: Predicted: Normal, True: Normal
Sample 11: Predicted: Normal, True: Normal
Sample 12: Predicted: Normal, True: Normal
Sample 13: Predicted: Normal, True: Normal
Sample 14: Predicted: Normal, True: Normal
Sample 15: Predicted: Normal, True: Normal
Sample 16: Predicted: Normal, True: Normal
Sample 17: Predicted: Normal, True: Normal
Sample 18: Predicted: Abnormal, True: Abnormal
Sample 19: Predicted: Normal, True: Normal
Sample 20: Predicted: Normal, True: Normal
Sample 21: Predicted: Normal, True: Normal
Sample 22: Predicted: Normal, True: Normal
Sample 23: Predicted: Normal, Tr