In [None]:
import numpy as np
import pandas as pd

# Đọc file CSV
file_path = "D:/code/gr1/data/dataset/test_network.csv"
data = pd.read_csv(file_path)

# Hiển thị các cột trong file để kiểm tra
print("Các cột trong dữ liệu:")
print(data.columns)

# Lấy các đặc trưng cần thiết giống với khi huấn luyện
list_features = [
    'bidirectional_duration_ms', 'src2dst_duration_ms',
    'dst2src_duration_ms', 'src2dst_packets', 'dst2src_packets',
    'src2dst_bytes', 'dst2src_bytes', 'src2dst_max_ps', 'src2dst_min_ps',
    'src2dst_mean_ps', 'src2dst_stddev_ps', 'dst2src_max_ps', 'dst2src_min_ps',
    'dst2src_mean_ps', 'dst2src_stddev_ps', 'bidirectional_mean_piat_ms',
    'bidirectional_stddev_piat_ms', 'bidirectional_max_piat_ms', 
    'bidirectional_min_piat_ms','src2dst_mean_piat_ms', 'src2dst_stddev_piat_ms', 
    'src2dst_max_piat_ms', 'src2dst_min_piat_ms', 'dst2src_mean_piat_ms',
    'dst2src_stddev_piat_ms', 'dst2src_max_piat_ms', 'dst2src_min_piat_ms', 
    'bidirectional_fin_packets', 'src2dst_fin_packets', 'dst2src_fin_packets', 
    'bidirectional_syn_packets', 'src2dst_syn_packets', 'dst2src_syn_packets',
    'bidirectional_rst_packets', 'src2dst_rst_packets', 'dst2src_rst_packets',
    'bidirectional_psh_packets', 'src2dst_psh_packets', 'dst2src_psh_packets', 
    'bidirectional_ack_packets', 'src2dst_ack_packets', 'dst2src_ack_packets', 
    'bidirectional_urg_packets', 'src2dst_urg_packets', 
    'bidirectional_cwr_packets', 'src2dst_cwr_packets', 
    'bidirectional_ece_packets', 'src2dst_ece_packets', 'Stage'
]

# Lọc các đặc trưng cần thiết
df = data[list_features].copy()


In [None]:
# Giả sử stage_mapping là từ điển mà bạn đã định nghĩa
stage_mapping = {
    'Benign': 0,        
    'Reconnaissance': 1,     
    'Establish Foothold': 2,
    'Lateral Movement': 3,
    'Data Exfiltration': 4,
    'Cover up': 5
}

df['Label'] = df['Stage'].map(stage_mapping)


# Giả sử df là DataFrame đã đọc từ một tập tin CSV và có cột 'Stage'
# In ra các giá trị duy nhất trong cột 'Stage'
unique_stages = df['Stage'].unique()
print(f"Các giá trị duy nhất trong cột 'Stage': {unique_stages}")

In [None]:
# Define the matrix for Attack and Defender
matrix = {
    'Attack': {
        0: [(2.0, 2.5), (2.5, 3.0), (2.5, 3.0), (2.5, 3.0), (2.5, 3.0)],
        1: [(1.0, 1.5), (1.0, 1.5), (2.0, 2.5), (2.0, 2.5), (2.5, 3.5)],
        2: [(1.5, 2.0), (2.0, 2.5), (2.5, 3.5), (3.5, 5.0), (3.0, 5.0)],
        3: [(1.5, 2.5), (2.0, 3.0), (2.5, 3.5), (3.5, 4.5), (3.5, 4.5)],
        4: [(1.5, 2.5), (1.5, 2.5), (3.5, 4.5), (3.5, 4.5), (3.5, 4.5)]
    },
    'Defender': {
        0: [(8.5, 9.5), (8.5, 9.5), (7.5, 8.5), (7.5, 8.5), (7.0, 8.0)],
        1: [(7.5, 8.5), (7.5, 8.5), (7.5, 8.5), (7.5, 8.5), (7.5, 8.5)],
        2: [(7.5, 8.5), (7.5, 8.5), (7.5, 8.5), (7.5, 8.5), (7.5, 8.5)],
        3: [(6.5, 7.5), (6.5, 7.5), (6.5, 7.5), (6.5, 7.5), (6.5, 7.5)],
        4: [(7.5, 8.5), (7.5, 8.5), (7.5, 8.5), (7.5, 8.5), (7.5, 8.5)]
    }
}

In [None]:
# Function to generate random points based on the matrix
def generate_random_points(label, role, num_points=1):
    if label not in matrix[role]:
        raise ValueError(f"Label '{label}' does not exist in the matrix for role '{role}'.")

    ranges = matrix[role][label]
    random_points = []
    for _ in range(num_points):
        point = [round(np.random.uniform(low, high), 2) for low, high in ranges]
        random_points.append(point)

    return random_points

In [None]:
# Identify changes in the Label column
df['Label_Changed'] = df['Label'].ne(df['Label'].shift()).astype(int)


# Ensure the first label is always considered as changed
df.loc[0, 'Label_Changed'] = 1  

# Generate points only when the Label changes
results = []

# Filter rows where the Label changes (Label_Changed == 1)
changed_rows = df[df['Label_Changed'] == 1]
print(changed_rows)
# Iterate over the filtered rows
# Iterate over the filtered rows
for idx, row in changed_rows.iterrows():
    label = row['Label']
    # Generate points for the current label
    attack_points = generate_random_points(label, 'Attack')
    defender_points = generate_random_points(label, 'Defender')
    print(idx)
    print(attack_points)
    # Add the generated points to the results
    for set_idx, (attack, defender) in enumerate(zip(attack_points, defender_points)):
        result_row = {
            "Label": label,
            **{f"Attack_Feature_{i+1}": value for i, value in enumerate(attack)},
            **{f"Defender_Feature_{i+1}": value for i, value in enumerate(defender)}
        }
        print(set_idx)
        # Append the result_row to the results list
        results.append(result_row)

# Convert results to DataFrame
result_df = pd.DataFrame(results)
# 
print(result_df)


In [None]:

# Save the changes and generated points to a new CSV file
output_file_path = 'D:/code/gr1/data/dataset/Book1.csv'
result_df.to_csv(output_file_path, index=False)

# Display the first few rows of the result
pd.set_option('display.max_columns', None)
print(result_df.head(20))