In [None]:
from collections import deque
from time import perf_counter
import numpy as np
from scipy.stats import entropy
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

# 假參數
W = 256       # window size
B = 1024      # cluster batch
bins = 50

# 初始化
window = deque(maxlen=W)
hist = np.zeros(bins)
edges = np.linspace(0, 1, bins+1)

# timer 累計
feat_times = []
cluster_times = []

# 模擬流進來
for idx, flow in enumerate(stream_of_flows):  # 假裝這是你的 generator
    # 1) 增量更新 histogram
    if len(window) == W:
        old = window[0]
        # 減掉 old 在 hist 的貢獻 (find its bin)
        b = np.searchsorted(edges, old, side='right') - 1
        hist[b] -= 1
    window.append(flow)
    # 加上 new
    b_new = np.searchsorted(edges, flow, side='right') - 1
    hist[b_new] += 1

    # 2) 計算熵
    t0 = perf_counter()
    h = entropy((hist / hist.sum()) + 1e-10)  # Shannon as示範
    t1 = perf_counter()
    feat_times.append((t1 - t0) * 1000)  # ms

    # store this window entropy
    entropies.append(h)

    # 3) 分群觸發
    if len(entropies) == B:
        # prepare batch features
        X = np.array(entropies).reshape(-1,1)  # or多維
        Xs = StandardScaler().fit_transform(X)

        tc0 = perf_counter()
        gmm = GaussianMixture(n_components=2)
        gmm.fit(Xs)
        gmm.predict(Xs)
        tc1 = perf_counter()
        cluster_times.append((tc1 - tc0) * 1000)  # ms

        entropies.clear()

# 最後計算平均值
t_feat = np.mean(feat_times)        # ms/flow
T_cluster = np.mean(cluster_times)  # ms/batch
t1 = t_feat + T_cluster / B

print(f"t_feat = {t_feat:.4f} ms/flow")
print(f"T_cluster = {T_cluster:.2f} ms/batch")
print(f"t1 = {t1:.4f} ms/flow")


DataFrame successfully exported as Parquet to combined_data.parquet
Label
DDoS      333540
Benign     97831
Name: count, dtype: int64
處理後資料大小: (431371, 78)
內存使用減少: 16.62%
   Protocol  Flow Duration  Total Fwd Packets  Total Backward Packets  \
0        17             49                  2                       0   
1        17              1                  2                       0   
2        17              1                  2                       0   
3        17              1                  2                       0   
4        17              1                  2                       0   

   Fwd Packets Length Total  Bwd Packets Length Total  Fwd Packet Length Max  \
0                     458.0                       0.0                  229.0   
1                    2944.0                       0.0                 1472.0   
2                     458.0                       0.0                  229.0   
3                    2944.0                       0.0                 

In [None]:
from collections import deque
from time import perf_counter
import numpy as np
from scipy.stats import entropy
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

# 假參數
W = 256       # window size
B = 1024      # cluster batch
bins = 50

# 初始化
window = deque(maxlen=W)
hist = np.zeros(bins)
edges = np.linspace(0, 1, bins+1)

# timer 累計
feat_times = []
cluster_times = []



# 模擬流進來
for idx, flow in enumerate(stream_of_flows):  # 假裝這是你的 generator
    # 1) 增量更新 histogram
    if len(window) == W:
        old = window[0]
        # 減掉 old 在 hist 的貢獻 (find its bin)
        b = np.searchsorted(edges, old, side='right') - 1
        hist[b] -= 1
    window.append(flow)
    # 加上 new
    b_new = np.searchsorted(edges, flow, side='right') - 1
    hist[b_new] += 1

    # 2) 計算熵
    t0 = perf_counter()
    h = entropy((hist / hist.sum()) + 1e-10)  # Shannon as示範
    t1 = perf_counter()
    feat_times.append((t1 - t0) * 1000)  # ms

    # store this window entropy
    entropies.append(h)

    # 3) 分群觸發
    if len(entropies) == B:
        # prepare batch features
        X = np.array(entropies).reshape(-1,1)  # or多維
        Xs = StandardScaler().fit_transform(X)

        tc0 = perf_counter()
        gmm = GaussianMixture(n_components=2)
        gmm.fit(Xs)
        gmm.predict(Xs)
        tc1 = perf_counter()
        cluster_times.append((tc1 - tc0) * 1000)  # ms

        entropies.clear()

# 最後計算平均值
t_feat = np.mean(feat_times)        # ms/flow
T_cluster = np.mean(cluster_times)  # ms/batch
t1 = t_feat + T_cluster / B

print(f"t_feat = {t_feat:.4f} ms/flow")
print(f"T_cluster = {T_cluster:.2f} ms/batch")
print(f"t1 = {t1:.4f} ms/flow")


In [16]:
import time
import tracemalloc
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.mixture import GaussianMixture

# 初始模型比較資訊
print("模型／機制\t\tF1 Score\tRecall\tThroughput (samp/ms)\tMemory (MB)")
print("Transformer\t\t0.997\t\t0.997\t\t79.90\t\t\t1498")
print("Mamba\t\t\t0.983\t\t0.984\t\t383.52\t\t499\n")

# 模擬推論時間函式
def transformer_inference(batch_size):
    time.sleep(12.81e-3)  # 模擬 12.81 ms for 1024 samples

def mamba_inference(batch_size):
    t = batch_size / 383.52  # ms
    time.sleep(t / 1000)

# 參數
W = 100        # window size for entropy window
B = 1024       # batch size
alpha = 0.5    # Renyi parameter
bins = 50      # histogram bins

# 模擬流資料
np.random.seed(42)
total_flows = W + B
flows = np.random.rand(total_flows).astype(np.float32)

# 基礎 Flow 累積 (buffer 時間)
t0 = time.perf_counter()
buffer = []
for flow in flows[:B]:
    buffer.append(flow)
t1 = time.perf_counter()
t_append_total = (t1 - t0) * 1000
baseline_flow = t_append_per_flow = 0.001

# 向量化熵計算 (計算三種熵: Shannon, Renyi, Min-entropy)
# 同時監控記憶體使用
tracemalloc.start()
tr0_snap = tracemalloc.take_snapshot()
edges = np.linspace(0, 1, bins + 1)
bin_idxs = np.searchsorted(edges, flows, side='right') - 1
one_hot = np.eye(bins, dtype=np.int32)[bin_idxs]
hist_matrix = np.array([
    np.convolve(one_hot[:, i], np.ones(W, dtype=int), mode='valid')
    for i in range(bins)
])[:, :B]

et0 = time.perf_counter()
probs = hist_matrix / W + 1e-10
# Shannon entropy
shannon = -(probs * np.log2(probs)).sum(axis=0)
# Renyi entropy
renyi = (1.0 / (1.0 - alpha)) * np.log((probs ** alpha).sum(axis=0))
# Min-entropy
min_ent = -np.log2(probs.max(axis=0))
et1 = time.perf_counter()
# 熵計算時間 per flow
entropy_time = (et1 - et0) * 1000 / B

# 記憶體消耗統計
tr1_snap = tracemalloc.take_snapshot()
stats = tr1_snap.compare_to(tr0_snap, 'lineno')
peak_mem = tracemalloc.get_traced_memory()[1] / (1024 * 1024)  # MB
tracemalloc.stop()

print(f"Entropy stage 記憶體峰值: {peak_mem:.3f} MB")

# GMM predict (per flow)
feat_vec = np.vstack([shannon, renyi, min_ent]).T
X_scaled = StandardScaler().fit_transform(feat_vec)
gmm = GaussianMixture(n_components=2, random_state=0)
gmm.fit(X_scaled)
t0 = time.perf_counter()
gmm.predict(X_scaled)
t1 = time.perf_counter()
gmm_time = (t1 - t0) * 1000 / B

# Transformer inference per flow (for Transform pipeline)
t0 = time.perf_counter()
transformer_inference(B)
t1 = time.perf_counter()
transform_time = (t1 - t0) * 1000 / B

# Mamba inference per flow (for EM pipeline)
t0 = time.perf_counter()
mamba_inference(B)
t1 = time.perf_counter()
mamba_time = (t1 - t0) * 1000 / B
stage2_em = 0.28 * mamba_time

# 各階段耗時計算
stage1_em = entropy_time + gmm_time  # 熵計算 + GMM.predict

# 總計
total_transform = baseline_flow + transform_time
total_em = baseline_flow + stage1_em + stage2_em

# 輸出表格 (顯示 baseline, entropy stage, inference stage, total)
print("| item             | baseline (ms/flow) | entropy stage (ms/flow) | inference stage (ms/flow) | total (ms/flow) |")
print("|------------------|--------------------|-------------------------|---------------------------|-----------------|")
print(f"| Transform        | {baseline_flow:.6f}           | {0.000000:.6f}              | {transform_time:.6f}                 | {total_transform:.6f}      |")
print(f"| Entropy + Mamba  | {baseline_flow:.6f}           | {stage1_em:.6f}              | {stage2_em:.6f}                 | {total_em:.6f}      |")

# 計算整體速度提升
speedup = total_transform / total_em
print(f"\n整體速度提升：{speedup:.2f}x")


模型／機制		F1 Score	Recall	Throughput (samp/ms)	Memory (MB)
Transformer		0.997		0.997		79.90			1498
Mamba			0.983		0.984		383.52		499

Entropy stage 記憶體峰值: 1.486 MB
| item             | baseline (ms/flow) | entropy stage (ms/flow) | inference stage (ms/flow) | total (ms/flow) |
|------------------|--------------------|-------------------------|---------------------------|-----------------|
| Transform        | 0.001000           | 0.000000              | 0.015719                 | 0.016719      |
| Entropy + Mamba  | 0.001000           | 0.000799              | 0.000927                 | 0.002726      |

整體速度提升：6.13x
