In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.feature_selection import RFE

# Tải dữ liệu Iris
iris = load_iris()
X = iris.data
y = iris.target

# Chia dữ liệu thành tập huấn luyện và kiểm tra
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Khởi tạo mô hình SVC với kernel linear
model = SVC(kernel="linear")

# Khởi tạo RFE và chọn 2 đặc trưng quan trọng nhất
rfe = RFE(estimator=model, n_features_to_select=2)
rfe = rfe.fit(X_train, y_train)

# Các đặc trưng được chọn
selected_features = rfe.support_
print("Selected features: ", selected_features)

# Xếp hạng các đặc trưng
print("Features ranking: ", rfe.ranking_)

# Huấn luyện lại mô hình với các đặc trưng đã chọn
X_train_selected = X_train[:, selected_features]
model.fit(X_train_selected, y_train)

# Kiểm tra mô hình trên tập kiểm tra
X_test_selected = X_test[:, selected_features]
score = model.score(X_test_selected, y_test)
print("Test accuracy: ", score)


Selected features:  [False False  True  True]
Features ranking:  [3 2 1 1]
Test accuracy:  1.0


In [8]:
time_features = [
    'flow_duration',
    'fwd_iat.min', 'fwd_iat.max', 'fwd_iat.tot', 'fwd_iat.avg', 'fwd_iat.std',
    'bwd_iat.min', 'bwd_iat.max', 'bwd_iat.tot', 'bwd_iat.avg', 'bwd_iat.std',
    'flow_iat.min', 'flow_iat.max', 'flow_iat.tot', 'flow_iat.avg', 'flow_iat.std',
    'active.min', 'active.max', 'active.tot', 'active.avg', 'active.std',
    'idle.min', 'idle.max', 'idle.tot', 'idle.avg', 'idle.std',
    'fwd_pkts_per_sec', 'bwd_pkts_per_sec', 'flow_pkts_per_sec',
    'payload_bytes_per_second'
]


In [10]:
import pandas as pd
data = pd.read_csv('data/RT_IOT2022')
print(data.columns)
       # Danh sách đặc trưng
features = ['fwd_pkts_tot']

Index(['Unnamed: 0', 'id.orig_p', 'id.resp_p', 'proto', 'service',
       'flow_duration', 'fwd_pkts_tot', 'bwd_pkts_tot', 'fwd_data_pkts_tot',
       'bwd_data_pkts_tot', 'fwd_pkts_per_sec', 'bwd_pkts_per_sec',
       'flow_pkts_per_sec', 'down_up_ratio', 'fwd_header_size_tot',
       'fwd_header_size_min', 'fwd_header_size_max', 'bwd_header_size_tot',
       'bwd_header_size_min', 'bwd_header_size_max', 'flow_FIN_flag_count',
       'flow_SYN_flag_count', 'flow_RST_flag_count', 'fwd_PSH_flag_count',
       'bwd_PSH_flag_count', 'flow_ACK_flag_count', 'fwd_URG_flag_count',
       'bwd_URG_flag_count', 'flow_CWR_flag_count', 'flow_ECE_flag_count',
       'fwd_pkts_payload.min', 'fwd_pkts_payload.max', 'fwd_pkts_payload.tot',
       'fwd_pkts_payload.avg', 'fwd_pkts_payload.std', 'bwd_pkts_payload.min',
       'bwd_pkts_payload.max', 'bwd_pkts_payload.tot', 'bwd_pkts_payload.avg',
       'bwd_pkts_payload.std', 'flow_pkts_payload.min',
       'flow_pkts_payload.max', 'flow_pkts_payload.

In [15]:
# Danh sách đầy đủ tất cả các thuộc tính
all_features = [
    'Unnamed: 0', 'id.orig_p', 'id.resp_p', 'proto', 'service',
    'flow_duration', 'fwd_pkts_tot', 'bwd_pkts_tot', 'fwd_data_pkts_tot',
    'bwd_data_pkts_tot', 'fwd_pkts_per_sec', 'bwd_pkts_per_sec',
    'flow_pkts_per_sec', 'down_up_ratio', 'fwd_header_size_tot',
    'fwd_header_size_min', 'fwd_header_size_max', 'bwd_header_size_tot',
    'bwd_header_size_min', 'bwd_header_size_max', 'flow_FIN_flag_count',
    'flow_SYN_flag_count', 'flow_RST_flag_count', 'fwd_PSH_flag_count',
    'bwd_PSH_flag_count', 'flow_ACK_flag_count', 'fwd_URG_flag_count',
    'bwd_URG_flag_count', 'flow_CWR_flag_count', 'flow_ECE_flag_count',
    'fwd_pkts_payload.min', 'fwd_pkts_payload.max', 'fwd_pkts_payload.tot',
    'fwd_pkts_payload.avg', 'fwd_pkts_payload.std', 'bwd_pkts_payload.min',
    'bwd_pkts_payload.max', 'bwd_pkts_payload.tot', 'bwd_pkts_payload.avg',
    'bwd_pkts_payload.std', 'flow_pkts_payload.min',
    'flow_pkts_payload.max', 'flow_pkts_payload.tot',
    'flow_pkts_payload.avg', 'flow_pkts_payload.std', 'fwd_iat.min',
    'fwd_iat.max', 'fwd_iat.tot', 'fwd_iat.avg', 'fwd_iat.std',
    'bwd_iat.min', 'bwd_iat.max', 'bwd_iat.tot', 'bwd_iat.avg',
    'bwd_iat.std', 'flow_iat.min', 'flow_iat.max', 'flow_iat.tot',
    'flow_iat.avg', 'flow_iat.std', 'payload_bytes_per_second',
    'fwd_subflow_pkts', 'bwd_subflow_pkts', 'fwd_subflow_bytes',
    'bwd_subflow_bytes', 'fwd_bulk_bytes', 'bwd_bulk_bytes',
    'fwd_bulk_packets', 'bwd_bulk_packets', 'fwd_bulk_rate',
    'bwd_bulk_rate', 'active.min', 'active.max', 'active.tot', 'active.avg',
    'active.std', 'idle.min', 'idle.max', 'idle.tot', 'idle.avg',
    'idle.std', 'fwd_init_window_size', 'bwd_init_window_size',
    'fwd_last_window_size', 'Attack_type'
]

# 1. Time-based features
time_keywords = ['iat', 'duration', 'active', 'idle', 'per_sec', 'payload_bytes_per_second']
time_features = [f for f in all_features if any(k in f for k in time_keywords)]

# 2. Payload features
payload_keywords = ['payload']
payload_features = [f for f in all_features if any(k in f for k in payload_keywords)]

# 3. TCP Flags
flag_keywords = ['flag_count']
flag_features = [f for f in all_features if any(k in f for k in flag_keywords)]

# 4. Flow size & statistics
size_keywords = ['header_size', 'bulk', 'subflow', 'tot', 'min', 'max', 'avg', 'std', 'rate']
flow_size_features = [f for f in all_features if any(k in f for k in size_keywords)
                      and f not in time_features and f not in payload_features and f not in flag_features]

# 5. Protocol & IP Info
proto_info_features = ['id.orig_p', 'id.resp_p', 'proto', 'service', 'down_up_ratio']

# 6. TCP Window size
window_features = ['fwd_init_window_size', 'bwd_init_window_size', 'fwd_last_window_size']

# 7. Metadata / Index
meta_features = ['Unnamed: 0']

# 8. Label
label_feature = ['Attack_type']

# In ra kết quả
print("🔹 Time Features:", len(time_features))
print("🔹 Payload Features:", len(payload_features))
print("🔹 TCP Flag Features:", len(flag_features))
print("🔹 Flow Size/Stat Features:", len(flow_size_features))
print("🔹 Protocol/IP Features:", len(proto_info_features))
print("🔹 TCP Window Features:", len(window_features))
print("🔹 Metadata Features:", len(meta_features))
print("🔹 Label Feature:", len(label_feature))


🔹 Time Features: 30
🔹 Payload Features: 16
🔹 TCP Flag Features: 10
🔹 Flow Size/Stat Features: 20
🔹 Protocol/IP Features: 5
🔹 TCP Window Features: 3
🔹 Metadata Features: 1
🔹 Label Feature: 1
