In [1]:
from cuda_test import test_cuda_availability, matrix_multiplication_test
test_cuda_availability()
matrix_multiplication_test(size=1000, runs=5)

=== CUDA 可用性測試 ===
PyTorch版本: 2.4.1+cu124
CUDA是否可用: True
CUDA版本: 12.4
當前CUDA設備: 0
設備名稱: NVIDIA GeForce RTX 2060
設備數量: 1
設備屬性: _CudaDeviceProperties(name='NVIDIA GeForce RTX 2060', major=7, minor=5, total_memory=6143MB, multi_processor_count=30)

=== 矩陣乘法性能測試 (大小: 1000x1000) ===
CPU平均時間: 0.0054 秒
GPU平均時間: 0.0032 秒
GPU加速比: 1.69x


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class SWaTGraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=2):
        super(SWaTGraphSAGE, self).__init__()
        self.num_layers = num_layers
        
        self.convs = nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))
        
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x, edge_index):
        for i in range(self.num_layers - 1):
            x = self.convs[i](x, edge_index)
            x = F.relu(x)
            x = self.dropout(x)
            x = self.convs[-1](x, edge_index)
        return torch.sigmoid(x)

In [4]:
def create_swat_graph(normal_df, attack_df=None, save_path='graph_data.pt'):
    """創建 SWaT 系統的圖結構"""
    # 清理列名中的空格
    normal_df.columns = normal_df.columns.str.strip()
    if attack_df is not None:
        attack_df.columns = attack_df.columns.str.strip()
    
    # 獲取特徵列
    feature_cols = [col for col in normal_df.columns 
                   if col not in ['Timestamp', 'Normal/Attack']]
    
    print("\n清理後的正常數據列名:")
    print(normal_df.columns.tolist())
    print("\n清理後的攻擊數據列名:")
    print(attack_df.columns.tolist() if attack_df is not None else "None")
    
    # 準備節點特徵
    node_features = normal_df[feature_cols].values
    if attack_df is not None:
        attack_features = attack_df[feature_cols].values
        node_features = np.vstack([node_features, attack_features])
    
    # 創建邊的連接關係
    edges = []
    feature_to_idx = {name: idx for idx, name in enumerate(feature_cols)}
    
    # 定義要連接的特徵對
    connections = [
    # P1 connections
    ('FIT101', 'LIT101'),
    ('MV101', 'FIT101'),
    ('P101', 'LIT101'),
    ('P102', 'FIT101'),

    # P2 connections
    ('AIT201', 'AIT202'),
    ('AIT202', 'AIT203'),
    ('FIT201', 'AIT201'),
    ('MV201', 'FIT201'),
    ('P201', 'FIT201'),
    ('P202', 'AIT202'),
    ('P203', 'AIT203'),
    ('P204', 'FIT201'),
    ('P205', 'AIT202'), 
    ('P206', 'AIT203'),  

    # P3 connections
    ('DPIT301', 'FIT301'),
    ('FIT301', 'LIT301'),
    ('MV301', 'FIT301'),
    ('MV302', 'LIT301'),
    ('MV303', 'FIT301'),
    ('MV304', 'LIT301'),
    ('P301', 'FIT301'),
    ('P302', 'LIT301'),

    # P4 connections
    ('AIT401', 'AIT402'),
    ('FIT401', 'LIT401'),
    ('P401', 'FIT401'),
    ('P402', 'LIT401'),
    ('P403', 'FIT401'),
    ('P404', 'LIT401'),
    ('UV401', 'FIT401'),

    # P5 connections
    ('AIT501', 'AIT502'),
    ('AIT502', 'AIT503'),
    ('AIT503', 'AIT504'),
    ('FIT501', 'AIT501'),
    ('FIT502', 'AIT502'),
    ('FIT503', 'AIT503'),
    ('FIT504', 'AIT504'),
    ('P501', 'FIT501'),
    ('P502', 'FIT502'),
    ('PIT501', 'FIT503'),
    ('PIT502', 'FIT504'),
    ('PIT503', 'FIT503'),

    # P6 connections
    ('FIT601', 'P601'),
    ('P601', 'P602'),
    ('P602', 'P603'),

    # Cross-process connections
    ('LIT101', 'AIT201'),  
    ('AIT203', 'DPIT301'),
    ('LIT301', 'AIT401'), 
    ('FIT401', 'AIT501'), 
    ('AIT503', 'FIT601'),  
    ('LIT301', 'FIT201'),  
    ('AIT401', 'DPIT301'), 
    ('FIT503', 'AIT401'), 
    ('P205', 'LIT301'),    
    ('P206', 'FIT503')     
    ]
    
    print("\n創建的連接:")
    for src, dst in connections:
        if src in feature_to_idx and dst in feature_to_idx:
            i, j = feature_to_idx[src], feature_to_idx[dst]
            edges.extend([[i, j], [j, i]])  # 添加雙向邊
            print(f"{src} <-> {dst}")
    
    # 轉換為PyTorch張量
    edge_index = torch.tensor(edges, dtype=torch.long).t()
    x = torch.tensor(node_features, dtype=torch.float)
    
    # 創建標籤
    y = torch.zeros(len(node_features))
    if attack_df is not None:
        y[len(normal_df):] = 1
    
    # 儲存 x 和 edge_index
    torch.save({'x': x, 'edge_index': edge_index}, save_path)
    print(f"x 和 edge_index 已儲存至 {save_path}")
    
    print(f"\n最終圖結構:")
    print(f"節點數量: {x.size(0)}")
    print(f"節點特徵維度: {x.size(1)}")
    print(f"邊的數量: {edge_index.size(1)}")
    
    return Data(x=x, edge_index=edge_index, y=y)


In [5]:
    # connections = [
    #     ('FIT101', 'LIT101'),  # P1 connections
    #     ('MV101', 'FIT101'),
    #     ('P101', 'LIT101'),
    #     ('P102', 'FIT101'),
        
    #     ('AIT201', 'AIT202'),  # P2 connections
    #     ('AIT202', 'AIT203'),
    #     ('FIT201', 'AIT201'),
        
    #     ('DPIT301', 'FIT301'),  # P3 connections
    #     ('FIT301', 'LIT301'),
    #     ('MV301', 'FIT301'),
    #     ('MV302', 'LIT301'),
        
    #     ('AIT401', 'AIT402'),  # P4 connections
    #     ('FIT401', 'LIT401'),
    #     ('P401', 'FIT401'),
        
    #     ('AIT501', 'AIT502'),  # P5 connections
    #     ('AIT502', 'AIT503'),
    #     ('FIT501', 'AIT501'),
    #     ('FIT502', 'AIT502'),
    #     ('FIT503', 'AIT503'),
        
    #     # Cross-process connections
    #     ('LIT101', 'AIT201'),
    #     ('AIT203', 'DPIT301'),
    #     ('LIT301', 'AIT401'),
    #     ('FIT401', 'AIT501')
    # ]

In [6]:
def train_graphsage(model, data, epochs=100, lr=0.01):
    """訓練 GraphSAGE 模型"""
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.BCELoss()
    
    # 分割訓練集和測試集
    num_nodes = data.x.size(0)
    train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    test_mask = torch.zeros(num_nodes, dtype=torch.bool)
    
    train_indices = np.random.choice(num_nodes, int(0.8 * num_nodes), replace=False)
    train_mask[train_indices] = True
    test_mask[~train_mask] = True
    
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        
        out = model(data.x, data.edge_index)
        loss = criterion(out[train_mask].squeeze(), data.y[train_mask])
        
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            model.eval()
            with torch.no_grad():
                pred = (out[test_mask] > 0.5).float()
                acc = (pred.squeeze() == data.y[test_mask]).float().mean()
                print(f'Epoch {epoch+1:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')
            model.train()
    
    return model

In [7]:
def evaluate_model(model, data):
    """評估模型性能"""
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        pred = (out > 0.5).float()
        acc = (pred.squeeze() == data.y).float().mean()
        
        tp = ((pred.squeeze() == 1) & (data.y == 1)).sum()
        fp = ((pred.squeeze() == 1) & (data.y == 0)).sum()
        tn = ((pred.squeeze() == 0) & (data.y == 0)).sum()
        fn = ((pred.squeeze() == 0) & (data.y == 1)).sum()
        
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * precision * recall / (precision + recall)
        
    return {
        'accuracy': acc.item(),
        'precision': precision.item(),
        'recall': recall.item(),
        'f1': f1.item()
    }

In [8]:
if __name__ == "__main__":
    # 設定隨機種子確保結果可重現
    torch.manual_seed(42)
    np.random.seed(42)
    
    print("開始載入數據...")
    # 讀取預處理後的數據
    normal_df = pd.read_csv('processed_data/SWaT_normal.csv')
    attack_df = pd.read_csv('processed_data/SWaT_attack.csv')
    print(f"載入完成! 正常數據形狀: {normal_df.shape}, 攻擊數據形狀: {attack_df.shape}")
    
    print("\n創建圖結構...")
    # 創建圖
    data = create_swat_graph(normal_df, attack_df)
    print(f"圖創建完成! 節點數: {data.x.size(0)}, 特徵維度: {data.x.size(1)}")
    print(f"邊的數量: {data.edge_index.size(1)}")
    
    # 設置模型參數
    in_channels = data.x.size(1)  # 特徵維度
    hidden_channels = 64
    out_channels = 1
    print(f"\n模型配置:")
    print(f"輸入特徵維度: {in_channels}")
    print(f"隱藏層維度: {hidden_channels}")
    print(f"輸出維度: {out_channels}")
    
    print("\n初始化模型...")
    # 創建模型
    model = SWaTGraphSAGE(in_channels, hidden_channels, out_channels)
    print("模型架構:")
    print(model)
    
    print("\n開始訓練...")
    # 訓練模型
    model = train_graphsage(model, data, epochs=100)
    print("訓練完成!")
    
    print("\n進行模型評估...")
    # 評估模型
    metrics = evaluate_model(model, data)
    print("\nModel Performance:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")
    
    # 保存模型
    print("\n保存模型...")
    torch.save({'model_state_dict': model.state_dict(),'x': data.x,'edge_index':  data.edge_index}, 'swat_graphsage_model.pt')      # 保存為新的文件
    #torch.save(model.state_dict(), 'swat_graphsage_model.pt')
    print("模型已保存至 swat_graphsage_model.pt")

開始載入數據...
載入完成! 正常數據形狀: (495000, 53), 攻擊數據形狀: (449919, 53)

創建圖結構...

清理後的正常數據列名:
['Timestamp', 'FIT101', 'LIT101', 'MV101', 'P101', 'P102', 'AIT201', 'AIT202', 'AIT203', 'FIT201', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P205', 'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401', 'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 'P502', 'PIT501', 'PIT502', 'PIT503', 'FIT601', 'P601', 'P602', 'P603', 'Normal/Attack']

清理後的攻擊數據列名:
['Timestamp', 'FIT101', 'LIT101', 'MV101', 'P101', 'P102', 'AIT201', 'AIT202', 'AIT203', 'FIT201', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P205', 'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401', 'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 

In [9]:
# 詳細的準確率分析
print("\n=== 詳細準確率分析 ===")
with torch.no_grad():
    # 獲取預測結果
    predictions = model(data.x, data.edge_index)
    pred_labels = (predictions > 0.5).float().numpy().flatten()
    true_labels = data.y.numpy()
    
    # 計算整體準確率
    accuracy = (pred_labels == true_labels).mean()
    print(f"\n整體準確率: {accuracy:.4f}")
    
    # 計算每類的準確率
    normal_mask = (true_labels == 0)
    attack_mask = (true_labels == 1)
    
    normal_accuracy = (pred_labels[normal_mask] == true_labels[normal_mask]).mean()
    attack_accuracy = (pred_labels[attack_mask] == true_labels[attack_mask]).mean()
    
    print(f"正常數據準確率: {normal_accuracy:.4f}")
    print(f"攻擊數據準確率: {attack_accuracy:.4f}")
    
    # 計算混淆矩陣指標
    tp = np.sum((pred_labels == 1) & (true_labels == 1))
    tn = np.sum((pred_labels == 0) & (true_labels == 0))
    fp = np.sum((pred_labels == 1) & (true_labels == 0))
    fn = np.sum((pred_labels == 0) & (true_labels == 1))
    
    print("\n混淆矩陣指標:")
    print(f"真陽性 (True Positives): {tp}")
    print(f"真陰性 (True Negatives): {tn}")
    print(f"假陽性 (False Positives): {fp}")
    print(f"假陰性 (False Negatives): {fn}")
    
    # 計算精確率、召回率和F1分數
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    print("\n其他性能指標:")
    print(f"精確率 (Precision): {precision:.4f}")
    print(f"召回率 (Recall): {recall:.4f}")
    print(f"F1分數: {f1:.4f}")
    
    # 異常檢測結果分析
    print("\n異常檢測結果分析:")
    anomaly_scores = predictions.numpy().flatten()
    
    print(f"檢測到的異常比例: {(anomaly_scores > 0.5).mean():.4f}")
    print(f"最高異常分數: {anomaly_scores.max():.4f}")
    print(f"最低異常分數: {anomaly_scores.min():.4f}")
    print(f"平均異常分數: {anomaly_scores.mean():.4f}")
    
    # 計算不同閾值下的準確率
    print("\n不同閾值下的準確率:")
    thresholds = [0.3, 0.4, 0.5, 0.6, 0.7]
    for threshold in thresholds:
        pred_at_threshold = (anomaly_scores > threshold).astype(float)
        acc_at_threshold = (pred_at_threshold == true_labels).mean()
        print(f"閾值 {threshold}: {acc_at_threshold:.4f}")


=== 詳細準確率分析 ===

整體準確率: 0.9996
正常數據準確率: 0.9999
攻擊數據準確率: 0.9993

混淆矩陣指標:
真陽性 (True Positives): 449582
真陰性 (True Negatives): 494929
假陽性 (False Positives): 71
假陰性 (False Negatives): 337

其他性能指標:
精確率 (Precision): 0.9998
召回率 (Recall): 0.9993
F1分數: 0.9995

異常檢測結果分析:
檢測到的異常比例: 0.4759
最高異常分數: 1.0000
最低異常分數: 0.0000
平均異常分數: 0.4759

不同閾值下的準確率:
閾值 0.3: 0.9991
閾值 0.4: 0.9995
閾值 0.5: 0.9996
閾值 0.6: 0.9995
閾值 0.7: 0.9994


In [10]:
def analyze_feature_importance(model, data, feature_names):
    """分析特徵重要性和節點關係"""
    plt.figure(figsize=(20, 10))
    
    # 1. 計算特徵重要性
    with torch.no_grad():
        feature_importance = []
        base_output = model(data.x, data.edge_index)
        base_pred = (base_output > 0.5).float()
        
        for i in range(data.x.size(1)):
            perturbed_x = data.x.clone()
            perturbed_x[:, i] = torch.zeros_like(perturbed_x[:, i])
            
            new_output = model(perturbed_x, data.edge_index)
            new_pred = (new_output > 0.5).float()
            
            importance = (base_pred != new_pred).float().mean().item()
            feature_importance.append(importance)
        
        # 正規化特徵重要性
        feature_importance = np.array(feature_importance)
        feature_importance = (feature_importance - feature_importance.min()) / (feature_importance.max() - feature_importance.min())
        
        # 繪製熱力圖
        plt.subplot(121)
        importance_df = pd.DataFrame({
            'Feature': feature_names,
            'Importance': feature_importance
        }).sort_values('Importance', ascending=True)
        
        sns.barplot(x='Importance', y='Feature', data=importance_df, 
                   palette='YlOrRd')
        plt.title('Feature Importance')
        plt.xlabel('Normalized Importance')
        
        # 繪製節點關係圖
        plt.subplot(122)
        G = nx.Graph()
        
        # 添加節點
        for i, name in enumerate(feature_names):
            G.add_node(i, name=name, importance=feature_importance[i])
        
        # 添加邊
        edge_index = data.edge_index.numpy()
        edges = list(zip(edge_index[0], edge_index[1]))
        G.add_edges_from(edges)
        
        # 設置節點位置
        pos = nx.spring_layout(G, k=1, iterations=50)
        
        # 繪製節點
        node_sizes = [3000 * G.nodes[node]['importance'] for node in G.nodes()]
        node_colors = [G.nodes[node]['importance'] for node in G.nodes()]
        
        nx.draw_networkx_nodes(G, pos, 
                             node_size=node_sizes,
                             node_color=node_colors,
                             cmap=plt.cm.YlOrRd)
        nx.draw_networkx_edges(G, pos, alpha=0.2, edge_color='gray')
        
        # 添加標籤
        labels = {i: f"{name}\n{feature_importance[i]:.2f}" 
                 for i, name in enumerate(feature_names)}
        nx.draw_networkx_labels(G, pos, labels, font_size=8)
        
        plt.title('Node Relationship Graph\n(Node size and color indicate importance)')
    
    plt.tight_layout()
    plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 打印重要性排名
    importance_ranking = [(name, feature_importance[i]) 
                         for i, name in enumerate(feature_names)]
    importance_ranking.sort(key=lambda x: x[1], reverse=True)
    
    print("\n特徵重要性排名 (前10名):")
    for name, importance in importance_ranking[:10]:
        print(f"{name}: {importance:.4f}")
    
    # 進程級別分析
    process_importance = {}
    for name, importance in importance_ranking:
        process_num = name[-3:] if name[-3:].isdigit() else name[-2:] if name[-2:].isdigit() else name[-1]
        process = f"P{process_num}"
        if process not in process_importance:
            process_importance[process] = []
        process_importance[process].append(importance)
    
    print("\n進程重要性排名:")
    process_avg_importance = {
        process: np.mean(importances) 
        for process, importances in process_importance.items()
    }
    sorted_processes = sorted(
        process_avg_importance.items(), 
        key=lambda x: x[1], 
        reverse=True
    )
    
    for process, avg_importance in sorted_processes:
        print(f"{process}: {avg_importance:.4f}")
    
    return importance_ranking

In [11]:
def get_feature_names(normal_df):
    """獲取特徵名稱"""
    return [col for col in normal_df.columns 
            if col not in ['Timestamp', 'Normal/Attack']]

In [14]:
normal_df = pd.read_csv('processed_data/normal.csv')
attack_df = pd.read_csv('processed_data/attack.csv')

# 獲取特徵名稱
feature_names = get_feature_names(normal_df)

# 創建圖數據
data = create_swat_graph(normal_df, attack_df)

# 載入模型
model = SWaTGraphSAGE(
    in_channels=data.x.size(1),
    hidden_channels=64,
    out_channels=1
)
checkpoint = torch.load('swat_graphsage_model.pt')

# 恢復模型權重
model.load_state_dict(checkpoint['model_state_dict'])
# model.load_state_dict(torch.load('swat_graphsage_model.pt'))
model.eval()

print("分析特徵重要性...")
importance_ranking = analyze_feature_importance(model, data, feature_names)

print("\n分析完成! 結果已保存至 feature_importance.png")


清理後的正常數據列名:
['Timestamp', 'FIT101', 'LIT101', 'MV101', 'P101', 'P102', 'AIT201', 'AIT202', 'AIT203', 'FIT201', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P205', 'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401', 'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 'P502', 'PIT501', 'PIT502', 'PIT503', 'FIT601', 'P601', 'P602', 'P603', 'Normal/Attack']

清理後的攻擊數據列名:
['Timestamp', 'FIT101', 'LIT101', 'MV101', 'P101', 'P102', 'AIT201', 'AIT202', 'AIT203', 'FIT201', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P205', 'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401', 'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 'P502', 'PIT501', 'PIT502', 'PIT503', 'FIT601', 'P601', 'P602', 'P603

  checkpoint = torch.load('swat_graphsage_model.pt')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Importance', y='Feature', data=importance_df,



特徵重要性排名 (前10名):
AIT202: 1.0000
AIT501: 0.3716
DPIT301: 0.1933
LIT301: 0.0970
FIT201: 0.0899
LIT101: 0.0460
AIT502: 0.0459
PIT502: 0.0390
AIT402: 0.0360
P501: 0.0359

進程重要性排名:
P202: 0.5000
P501: 0.1030
P301: 0.0600
P201: 0.0248
P304: 0.0225
P502: 0.0217
P402: 0.0200
P101: 0.0160
P504: 0.0155
P205: 0.0146
P302: 0.0139
P503: 0.0114
P401: 0.0105
P303: 0.0068
P203: 0.0064
P403: 0.0006
P601: 0.0000
P102: 0.0000
P204: 0.0000
P206: 0.0000
P404: 0.0000
P602: 0.0000
P603: 0.0000

分析完成! 結果已保存至 feature_importance.png
