In [1]:
import pandas as pd
import os
from datetime import datetime

In [2]:
# Define paths
input_folder = '../Datasets/Per_UE_Datasets_final'
output_folder = '../Datasets/Per_Attack_Datasets'

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

In [3]:
# Define the time ranges and their corresponding file names
time_ranges = {
    "SYN": ("2024-08-17 12:00:00", "2024-08-18 09:00:00"),
    "ICMP": ("2024-08-18 09:00:01", "2024-08-19 12:00:00"),
    "UDP": ("2024-08-19 12:00:01", "2024-08-20 12:00:00"),
    "DNS": ("2024-08-20 12:00:01", "2024-08-21 14:00:00"),
    "GTPU": ("2024-08-21 14:00:01", "2024-08-22 06:59:58"),
}

In [4]:
# Create output data structures
output_data = {key: [] for key in time_ranges.keys()}

# Process each user CSV file
for filename in os.listdir(input_folder):
    if filename.endswith('.csv'):
        # Load the CSV file
        filepath = os.path.join(input_folder, filename)
        print(filepath)
        df = pd.read_csv(filepath, parse_dates=['_time'])  # Ensure 'timestamp' column is parsed as datetime

        # Filter data based on each time range and store it in the output data structure
        for name, (start, end) in time_ranges.items():
            start_time = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
            end_time = datetime.strptime(end, "%Y-%m-%d %H:%M:%S")
            filtered_data = df[(df['_time'] >= start_time) & (df['_time'] <= end_time)]
            output_data[name].append(filtered_data)

# Save each output data structure into a single CSV file per time range
for name, data_frames in output_data.items():
    # Concatenate data from all users for this specific time range
    combined_df = pd.concat(data_frames)
    
    # Save to a new CSV file
    output_file = os.path.join(output_folder, f"{name}.csv")
    combined_df.to_csv(output_file, index=False)
    print(f"Saved {output_file}")
