In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Path file di Google Drive
benign = "/content/drive/My Drive/EL_SEMESTER_8/VAE/data/Monday-WorkingHours.pcap_ISCX.csv"
dos = "/content/drive/My Drive/EL_SEMESTER_8/VAE/data/Wednesday-workingHours.pcap_ISCX.csv"
portscan = "/content/drive/My Drive/EL_SEMESTER_8/VAE/data/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv"
ddos  = "/content/drive/My Drive/EL_SEMESTER_8/VAE/data/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv"
patator = "/content/drive/My Drive/EL_SEMESTER_8/VAE/data/Tuesday-WorkingHours.pcap_ISCX.csv"

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.stats import f_oneway
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [None]:
# ====== LOAD DATA ====== #

# Load CSV
df_benign = pd.read_csv(benign)
df_dos = pd.read_csv(dos)
df_portscan = pd.read_csv(portscan)
df_ddos = pd.read_csv(ddos)
df_patator = pd.read_csv(patator)

In [None]:
# ====== PREPROCESSING ======

# Hapus spasi di awal dan akhir nama kolom
df_benign.columns = df_benign.columns.str.strip()
df_dos.columns = df_dos.columns.str.strip()
df_portscan.columns = df_portscan.columns.str.strip()
df_ddos.columns = df_ddos.columns.str.strip()
df_patator.columns = df_patator.columns.str.strip()

# Hapus baris yang mengandung inf atau -inf
df_benign = df_benign[~df_benign.isin([np.inf, -np.inf]).any(axis=1)]
df_dos = df_dos[~df_dos.isin([np.inf, -np.inf]).any(axis=1)]
df_portscan = df_portscan[~df_portscan.isin([np.inf, -np.inf]).any(axis=1)]
df_ddos = df_ddos[~df_ddos.isin([np.inf, -np.inf]).any(axis=1)]
df_patator = df_patator[~df_patator.isin([np.inf, -np.inf]).any(axis=1)]

# Hapus NaN jika ada setelah penghapusan inf
df_benign.dropna(inplace=True)
df_dos.dropna(inplace=True)
df_portscan.dropna(inplace=True)
df_ddos.dropna(inplace=True)
df_patator.dropna(inplace=True)

# Hapus baris yang memiliki label "Heartbleed" untuk beberapa dataframe
df_dos = df_dos[df_dos['Label'] != 'Heartbleed']

In [None]:
# ====== Ekstraksi Fitur untuk DoS  ====== #

# ====== Pilih hanya data dengan label BENIGN dan DoS* ======
filtered_data_dos = df_dos[df_dos["Label"].str.startswith("DoS") | df_dos["Label"].isin(["BENIGN"])]

# Pisahkan fitur (X) dan label (y)
X_dos = filtered_data_dos.drop(columns=["Label"])  # Hapus kolom label, sisakan fitur

# Encode label:
# - BENIGN → 0
# - Semua "DoS" → 1
y_dos = filtered_data_dos["Label"].apply(lambda x: 1 if x.startswith("DoS") else 0)

print("Total data awal:", X_dos.shape)
print("Jumlah NaN dalam X_dos:", X_dos.isna().sum().sum())  # Cek total NaN

# ====== Seleksi fitur menggunakan SelectKBest ======
k_dos = 9  # Jumlah fitur terbaik yang ingin dipilih
selector_dos = SelectKBest(score_func=f_classif, k=k_dos)
X_new_dos = selector_dos.fit_transform(X_dos, y_dos)

# Ambil skor setiap fitur
scores_dos = selector_dos.scores_

# Buat DataFrame untuk menyimpan skor
feature_scores_dos = pd.DataFrame({"Feature": X_dos.columns, "Score": scores_dos})

# Urutkan fitur dari skor tertinggi ke terendah
feature_scores_dos = feature_scores_dos.sort_values(by="Score", ascending=False)

# Ambil k fitur terbaik
selected_features_dos = feature_scores_dos.head(k_dos)

# Simpan hanya fitur yang terpilih
X_selected_dos = X_dos[selected_features_dos["Feature"].tolist()].copy()

print(selected_features_dos["Feature"].tolist())

print("Total data sebelum split:", X_selected_dos.shape)
print("Jumlah NaN dalam X_selected_dos:", X_selected_dos.isna().sum().sum())

Total data awal: (691395, 78)
Jumlah NaN dalam X_dos: 0


  f = msb / msw


['Bwd Packet Length Mean', 'Avg Bwd Segment Size', 'Bwd Packet Length Std', 'Bwd Packet Length Max', 'Packet Length Std', 'Idle Max', 'Fwd IAT Std', 'Fwd IAT Max', 'Flow IAT Max']
Total data sebelum split: (691395, 9)
Jumlah NaN dalam X_selected_dos: 0


In [None]:
# ====== Ekstraksi Fitur untuk portscan ====== #

# ====== Pilih hanya data dengan label BENIGN dan PortScan ======
filtered_data_portscan = df_portscan[df_portscan["Label"].isin(["BENIGN", "PortScan"])]

# Pisahkan fitur (X) dan label (y)
X_portscan = filtered_data_portscan.drop(columns=["Label"])  # Hapus kolom label, sisakan fitur

# Encode label:
# - BENIGN → 0
# - PortScan → 1
y_portscan = filtered_data_portscan["Label"].apply(lambda x: 1 if x == "PortScan" else 0)

print("Total data awal:", X_portscan.shape)
print("Jumlah NaN dalam X_portscan:", X_portscan.isna().sum().sum())  # Cek total NaN

# ====== Seleksi fitur menggunakan SelectKBest ======
k_portscan = 9  # Jumlah fitur terbaik yang ingin dipilih
selector_portscan = SelectKBest(score_func=f_classif, k=k_portscan)
X_new_portscan = selector_portscan.fit_transform(X_portscan, y_portscan)

# Ambil skor setiap fitur
scores_portscan = selector_portscan.scores_

# Buat DataFrame untuk menyimpan skor
feature_scores_portscan = pd.DataFrame({"Feature": X_portscan.columns, "Score": scores_portscan})

# Urutkan fitur dari skor tertinggi ke terendah
feature_scores_portscan = feature_scores_portscan.sort_values(by="Score", ascending=False)

# Ambil k fitur terbaik
selected_features_portscan = feature_scores_portscan.head(k_portscan)

# Simpan hanya fitur yang terpilih
X_selected_portscan = X_portscan[selected_features_portscan["Feature"].tolist()].copy()

print(selected_features_portscan["Feature"].tolist())

print("Total data sebelum split:", X_selected_portscan.shape)
print("Jumlah NaN dalam X_selected_portscan:", X_selected_portscan.isna().sum().sum())

Total data awal: (286096, 78)
Jumlah NaN dalam X_portscan: 0
['PSH Flag Count', 'Min Packet Length', 'Bwd Packet Length Min', 'Average Packet Size', 'Fwd Packet Length Min', 'ACK Flag Count', 'min_seg_size_forward', 'Packet Length Mean', 'Bwd Packet Length Mean']
Total data sebelum split: (286096, 9)
Jumlah NaN dalam X_selected_portscan: 0


  f = msb / msw


In [None]:
# ====== Ekstraksi Fitur untuk DDoS ====== #

# ====== Pilih hanya data dengan label BENIGN dan DDoS ======
filtered_data_ddos = df_ddos[df_ddos["Label"].isin(["BENIGN", "DDoS"])]

# Pisahkan fitur (X) dan label (y)
X_ddos = filtered_data_ddos.drop(columns=["Label"])  # Hapus kolom label, sisakan fitur

# Encode label:
# - BENIGN → 0
# - DDoS → 1
y_ddos = filtered_data_ddos["Label"].apply(lambda x: 1 if x == "DDoS" else 0)

print("Total data awal:", X_ddos.shape)
print("Jumlah NaN dalam X_ddos:", X_ddos.isna().sum().sum())  # Cek total NaN

# ====== Seleksi fitur menggunakan SelectKBest ======
k_ddos = 9  # Jumlah fitur terbaik yang ingin dipilih
selector_ddos = SelectKBest(score_func=f_classif, k=k_ddos)
X_new_ddos = selector_ddos.fit_transform(X_ddos, y_ddos)

# Ambil skor setiap fitur
scores_ddos = selector_ddos.scores_

# Buat DataFrame untuk menyimpan skor
feature_scores_ddos = pd.DataFrame({"Feature": X_ddos.columns, "Score": scores_ddos})

# Urutkan fitur dari skor tertinggi ke terendah
feature_scores_ddos = feature_scores_ddos.sort_values(by="Score", ascending=False)

# Ambil k fitur terbaik
selected_features_ddos = feature_scores_ddos.head(k_ddos)

# Simpan hanya fitur yang terpilih
X_selected_ddos = X_ddos[selected_features_ddos["Feature"].tolist()].copy()

print(selected_features_ddos["Feature"].tolist())

print("Total data sebelum split:", X_selected_ddos.shape)
print("Jumlah NaN dalam X_selected_ddos:", X_selected_ddos.isna().sum().sum())

Total data awal: (225711, 78)
Jumlah NaN dalam X_ddos: 0
['Bwd Packet Length Mean', 'Avg Bwd Segment Size', 'Bwd Packet Length Max', 'Bwd Packet Length Std', 'Destination Port', 'URG Flag Count', 'Packet Length Mean', 'Average Packet Size', 'Packet Length Std']
Total data sebelum split: (225711, 9)
Jumlah NaN dalam X_selected_ddos: 0


  f = msb / msw


In [None]:
# ====== Ekstraksi Fitur untuk Patator ====== #

# ====== Pilih hanya data dengan label BENIGN dan Patator* ======
filtered_data_patator = df_patator[df_patator["Label"].str.contains("Patator") |
                                   df_patator["Label"].isin(["BENIGN"])]

# Pisahkan fitur (X) dan label (y)
X_patator = filtered_data_patator.drop(columns=["Label"])  # Hapus kolom label, sisakan fitur

# Encode label:
# - BENIGN → 0
# - Semua "Patator" → 1
y_patator = filtered_data_patator["Label"].apply(
    lambda x: 1 if "Patator" in x else 0
)

print("Total data awal:", X_patator.shape)
print("Jumlah NaN dalam X_patator:", X_patator.isna().sum().sum())  # Cek total NaN

# ====== Seleksi fitur menggunakan SelectKBest ======
k_patator = 9  # Jumlah fitur terbaik yang ingin dipilih
selector_patator = SelectKBest(score_func=f_classif, k=k_patator)
X_new_patator = selector_patator.fit_transform(X_patator, y_patator)

# Ambil skor setiap fitur
scores_patator = selector_patator.scores_

# Buat DataFrame untuk menyimpan skor
feature_scores_patator = pd.DataFrame({"Feature": X_patator.columns, "Score": scores_patator})

# Urutkan fitur dari skor tertinggi ke terendah
feature_scores_patator = feature_scores_patator.sort_values(by="Score", ascending=False)

# Ambil k fitur terbaik
selected_features_patator = feature_scores_patator.head(k_patator)

# Simpan hanya fitur yang terpilih
X_selected_patator = X_patator[selected_features_patator["Feature"].tolist()].copy()

print(selected_features_patator["Feature"].tolist())
print("Total data sebelum split:", X_selected_patator.shape)
print("Jumlah NaN dalam X_selected_patator:", X_selected_patator.isna().sum().sum())

Total data awal: (445645, 78)
Jumlah NaN dalam X_patator: 0
['Fwd PSH Flags', 'SYN Flag Count', 'Min Packet Length', 'Bwd Packet Length Min', 'PSH Flag Count', 'Fwd Packet Length Min', 'Average Packet Size', 'act_data_pkt_fwd', 'Bwd Packet Length Mean']
Total data sebelum split: (445645, 9)
Jumlah NaN dalam X_selected_patator: 0


  f = msb / msw


In [None]:
save_path = "/content/drive/My Drive/EL_SEMESTER_8/VAE/4type/csv_dec/"

In [None]:
# ====== Membuat file csv untuk training dan validasi deteksi DoS ======

# ====== PISAHKAN TRAINING & VALIDATION SET ======
X_train_dos, X_val_dos, y_train_dos, y_val_dos = train_test_split(
    X_selected_dos, y_dos, test_size=0.2, random_state=42, stratify=y_dos
)

# Reset index agar bisa diakses dengan benar
X_train_dos = X_train_dos.reset_index(drop=True)
y_train_dos = y_train_dos.reset_index(drop=True)
X_val_dos = X_val_dos.reset_index(drop=True)
y_val_dos = y_val_dos.reset_index(drop=True)

print("Unique labels in y_train_dos:", y_train_dos.unique())  # Harusnya hanya 0 dan 1

# ====== NORMALISASI (HANYA FIT DI TRAINING, LALU DIPAKAI UNTUK VALIDATION) ======
scaler = MinMaxScaler()
X_train_dos = pd.DataFrame(scaler.fit_transform(X_train_dos), columns=X_train_dos.columns)
X_val_dos = pd.DataFrame(scaler.transform(X_val_dos), columns=X_val_dos.columns)

# ====== GABUNGKAN DENGAN LABEL AGAR FILTERING BENAR ======
train_data_dos = X_train_dos.copy()
train_data_dos["Label"] = y_train_dos.values  # Tambahkan label ke dataframe
val_data_dos = X_val_dos.copy()
val_data_dos["Label"] = y_val_dos.values

print("Total data in train_data_dos:", train_data_dos.shape)
print(train_data_dos.head())  # Cek apakah ada data atau kosong

# ====== PISAHKAN BENIGN DAN DoS PADA TRAINING SET ======
train_benign_for_dos = train_data_dos[train_data_dos["Label"] == 0].drop(columns=["Label"])
train_dos = train_data_dos[train_data_dos["Label"] == 1].drop(columns=["Label"])

# ====== PISAHKAN BENIGN DAN DoS PADA VALIDATION SET ======
val_benign_for_dos = val_data_dos[val_data_dos["Label"] == 0].drop(columns=["Label"])
val_dos = val_data_dos[val_data_dos["Label"] == 1].drop(columns=["Label"])

# ====== SIMPAN DATA KE CSV ======
train_benign_for_dos.to_csv(save_path + "train_benign_for_dos.csv", index=False)
train_dos.to_csv(save_path + "train_dos.csv", index=False)
val_benign_for_dos.to_csv(save_path + "val_benign_for_dos.csv", index=False)
val_dos.to_csv(save_path + "val_dos.csv", index=False)

print("Data telah dipisah menjadi training & validation dan disimpan ke CSV.")

Unique labels in y_train_dos: [1 0]
Total data in train_data_dos: (553116, 10)
   Bwd Packet Length Mean  Avg Bwd Segment Size  Bwd Packet Length Std  \
0                0.000000              0.000000               0.000000   
1                0.427208              0.427208               0.261710   
2                0.130786              0.130786               0.094202   
3                0.000000              0.000000               0.000000   
4                0.032754              0.032754               0.000000   

   Bwd Packet Length Max  Packet Length Std  Idle Max  Fwd IAT Std  \
0               0.000000           0.001005  0.833333     0.000000   
1               0.222427           0.361730  0.000000     0.000101   
2               0.074142           0.112202  0.083333     0.039548   
3               0.000000           0.000000  0.000000     0.000000   
4               0.006503           0.011678  0.000000     0.000000   

    Fwd IAT Max  Flow IAT Max  Label  
0  8.333333e-01 

In [None]:
# ====== Membuat file CSV untuk training dan validasi deteksi PortScan ======

# ====== PISAHKAN TRAINING & VALIDATION SET ======
X_train_portscan, X_val_portscan, y_train_portscan, y_val_portscan = train_test_split(
    X_selected_portscan, y_portscan, test_size=0.2, random_state=42, stratify=y_portscan
)

# Reset index agar bisa diakses dengan benar
X_train_portscan = X_train_portscan.reset_index(drop=True)
y_train_portscan = y_train_portscan.reset_index(drop=True)
X_val_portscan = X_val_portscan.reset_index(drop=True)
y_val_portscan = y_val_portscan.reset_index(drop=True)

print("Unique labels in y_train_portscan:", y_train_portscan.unique())  # Harusnya hanya 0 dan 1

# ====== NORMALISASI (HANYA FIT DI TRAINING, LALU DIPAKAI UNTUK VALIDATION) ======
scaler = MinMaxScaler()
X_train_portscan = pd.DataFrame(scaler.fit_transform(X_train_portscan), columns=X_train_portscan.columns)
X_val_portscan = pd.DataFrame(scaler.transform(X_val_portscan), columns=X_val_portscan.columns)

# ====== GABUNGKAN DENGAN LABEL AGAR FILTERING BENAR ======
train_data_portscan = X_train_portscan.copy()
train_data_portscan["Label"] = y_train_portscan.values  # Tambahkan label ke dataframe
val_data_portscan = X_val_portscan.copy()
val_data_portscan["Label"] = y_val_portscan.values

print("Total data in train_data_portscan:", train_data_portscan.shape)
print(train_data_portscan.head())  # Cek apakah ada data atau kosong

# ====== PISAHKAN BENIGN DAN PORTSCAN PADA TRAINING SET ======
train_benign_for_port = train_data_portscan[train_data_portscan["Label"] == 0].drop(columns=["Label"])
train_portscan = train_data_portscan[train_data_portscan["Label"] == 1].drop(columns=["Label"])

# ====== PISAHKAN BENIGN DAN PORTSCAN PADA VALIDATION SET ======
val_benign_for_port = val_data_portscan[val_data_portscan["Label"] == 0].drop(columns=["Label"])
val_portscan = val_data_portscan[val_data_portscan["Label"] == 1].drop(columns=["Label"])

# ====== SIMPAN DATA KE CSV ======
train_benign_for_port.to_csv(save_path + "train_benign_for_port.csv", index=False)
train_portscan.to_csv(save_path + "train_portscan.csv", index=False)
val_benign_for_port.to_csv(save_path + "val_benign_for_port.csv", index=False)
val_portscan.to_csv(save_path + "val_portscan.csv", index=False)

print("Data telah dipisah menjadi training & validation dan disimpan ke CSV.")

Unique labels in y_train_portscan: [1 0]
Total data in train_data_portscan: (228876, 10)
   PSH Flag Count  Min Packet Length  Bwd Packet Length Min  \
0             1.0           0.006920               0.004110   
1             0.0           0.020761               0.004110   
2             0.0           0.131488               0.113699   
3             1.0           0.006920               0.004110   
4             0.0           0.107266               0.040411   

   Average Packet Size  Fwd Packet Length Min  ACK Flag Count  \
0             0.002418               0.001359             0.0   
1             0.003868               0.004076             1.0   
2             0.053917               0.025815             0.0   
3             0.002418               0.001359             0.0   
4             0.046119               0.021060             0.0   

   min_seg_size_forward  Packet Length Mean  Bwd Packet Length Mean  Label  
0              0.400000            0.002099                0.002

In [None]:
# ====== Membuat file csv untuk training dan validasi deteksi DDoS ======

# ====== PISAHKAN TRAINING & VALIDATION SET ======
X_train_ddos, X_val_ddos, y_train_ddos, y_val_ddos = train_test_split(
    X_selected_ddos, y_ddos, test_size=0.2, random_state=42, stratify=y_ddos
)

# Reset index agar bisa diakses dengan benar
X_train_ddos = X_train_ddos.reset_index(drop=True)
y_train_ddos = y_train_ddos.reset_index(drop=True)
X_val_ddos = X_val_ddos.reset_index(drop=True)
y_val_ddos = y_val_ddos.reset_index(drop=True)

print("Unique labels in y_train_ddos:", y_train_ddos.unique())  # Harusnya hanya 0 dan 1

# ====== NORMALISASI (HANYA FIT DI TRAINING, LALU DIPAKAI UNTUK VALIDATION) ======
scaler = MinMaxScaler()
X_train_ddos = pd.DataFrame(scaler.fit_transform(X_train_ddos), columns=X_train_ddos.columns)
X_val_ddos = pd.DataFrame(scaler.transform(X_val_ddos), columns=X_val_ddos.columns)

# ====== GABUNGKAN DENGAN LABEL AGAR FILTERING BENAR ======
train_data_ddos = X_train_ddos.copy()
train_data_ddos["Label"] = y_train_ddos.values  # Tambahkan label ke dataframe
val_data_ddos = X_val_ddos.copy()
val_data_ddos["Label"] = y_val_ddos.values

print("Total data in train_data_ddos:", train_data_ddos.shape)
print(train_data_ddos.head())  # Cek apakah ada data atau kosong

# ====== PISAHKAN BENIGN DAN DDoS PADA TRAINING SET ======
train_benign_for_ddos = train_data_ddos[train_data_ddos["Label"] == 0].drop(columns=["Label"])
train_ddos = train_data_ddos[train_data_ddos["Label"] == 1].drop(columns=["Label"])

# ====== PISAHKAN BENIGN DAN DDoS PADA VALIDATION SET ======
val_benign_for_ddos = val_data_ddos[val_data_ddos["Label"] == 0].drop(columns=["Label"])
val_ddos = val_data_ddos[val_data_ddos["Label"] == 1].drop(columns=["Label"])

# ====== SIMPAN DATA KE CSV ======
train_benign_for_ddos.to_csv(save_path + "train_benign_for_ddos.csv", index=False)
train_ddos.to_csv(save_path + "train_ddos.csv", index=False)
val_benign_for_ddos.to_csv(save_path + "val_benign_for_ddos.csv", index=False)
val_ddos.to_csv(save_path + "val_ddos.csv", index=False)

print("Data telah dipisah menjadi training & validation dan disimpan ke CSV.")

Unique labels in y_train_ddos: [1 0]
Total data in train_data_ddos: (180568, 10)
   Bwd Packet Length Mean  Avg Bwd Segment Size  Bwd Packet Length Max  \
0                0.500259              0.500259               0.992723   
1                0.000000              0.000000               0.000000   
2                0.008448              0.008448               0.004195   
3                0.000000              0.000000               0.000000   
4                0.000000              0.000000               0.000000   

   Bwd Packet Length Std  Destination Port  URG Flag Count  \
0               0.707229          0.001221             0.0   
1               0.000000          0.001221             0.0   
2               0.000000          0.000809             0.0   
3               0.000000          0.001221             0.0   
4               0.000000          0.002091             0.0   

   Packet Length Mean  Average Packet Size  Packet Length Std  Label  
0            0.750774         

In [None]:
# ====== Membuat file csv untuk training dan validasi deteksi Patator ======

# ====== PISAHKAN TRAINING & VALIDATION SET ======
X_train_patator, X_val_patator, y_train_patator, y_val_patator = train_test_split(
    X_selected_patator, y_patator, test_size=0.2, random_state=42, stratify=y_patator
)

# Reset index agar bisa diakses dengan benar
X_train_patator = X_train_patator.reset_index(drop=True)
y_train_patator = y_train_patator.reset_index(drop=True)
X_val_patator = X_val_patator.reset_index(drop=True)
y_val_patator = y_val_patator.reset_index(drop=True)

print("Unique labels in y_train_patator:", y_train_patator.unique())  # Harusnya hanya 0 dan 1

# ====== NORMALISASI (HANYA FIT DI TRAINING, LALU DIPAKAI UNTUK VALIDATION) ======
scaler = MinMaxScaler()
X_train_patator = pd.DataFrame(scaler.fit_transform(X_train_patator), columns=X_train_patator.columns)
X_val_patator = pd.DataFrame(scaler.transform(X_val_patator), columns=X_val_patator.columns)

# ====== GABUNGKAN DENGAN LABEL AGAR FILTERING BENAR ======
train_data_patator = X_train_patator.copy()
train_data_patator["Label"] = y_train_patator.values  # Tambahkan label ke dataframe
val_data_patator = X_val_patator.copy()
val_data_patator["Label"] = y_val_patator.values

print("Total data in train_data_patator:", train_data_patator.shape)
print(train_data_patator.head())  # Cek apakah ada data atau kosong

# ====== PISAHKAN BENIGN DAN PATATOR PADA TRAINING SET ======
train_benign_for_patator = train_data_patator[train_data_patator["Label"] == 0].drop(columns=["Label"])
train_patator = train_data_patator[train_data_patator["Label"] == 1].drop(columns=["Label"])

# ====== PISAHKAN BENIGN DAN PATATOR PADA VALIDATION SET ======
val_benign_for_patator = val_data_patator[val_data_patator["Label"] == 0].drop(columns=["Label"])
val_patator = val_data_patator[val_data_patator["Label"] == 1].drop(columns=["Label"])

# ====== SIMPAN DATA KE CSV ======
train_benign_for_patator.to_csv(save_path + "train_benign_for_patator.csv", index=False)
train_patator.to_csv(save_path + "train_patator.csv", index=False)
val_benign_for_patator.to_csv(save_path + "val_benign_for_patator.csv", index=False)
val_patator.to_csv(save_path + "val_patator.csv", index=False)

print("Data telah dipisah menjadi training & validation dan disimpan ke CSV.")

Unique labels in y_train_patator: [0 1]
Total data in train_data_patator: (356516, 10)
   Fwd PSH Flags  SYN Flag Count  Min Packet Length  Bwd Packet Length Min  \
0            0.0             0.0           0.016335               0.007942   
1            0.0             0.0           0.024148               0.042472   
2            0.0             0.0           0.033381               0.067680   
3            0.0             0.0           0.000000               0.000000   
4            0.0             0.0           0.037642               0.027970   

   PSH Flag Count  Fwd Packet Length Min  Average Packet Size  \
0             0.0               0.011138             0.008615   
1             0.0               0.016465             0.018129   
2             0.0               0.022760             0.043450   
3             0.0               0.000000             0.000000   
4             0.0               0.025666             0.028018   

   act_data_pkt_fwd  Bwd Packet Length Mean  Label  
