In [6]:
import csv
import os
from datetime import datetime, timedelta

In [7]:
filename = '/content/preprocessedd FlowStatsfile.csv'
output_folder = 'processed'

In [8]:
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)


In [9]:
# Function to read CSV file and create 2-second time windows
def process_csv_with_time_windows(filename):
    with open(filename, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)

        # Initialize variables
        current_time = None
        time_window_start = None
        time_window_end = None
        window_data = []

        for row in csvreader:
            timestamp = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')

            if current_time is None:
                current_time = timestamp
                time_window_start = current_time
                time_window_end = time_window_start + timedelta(seconds=2)

            # Check if the current row's timestamp falls within the current time window
            if timestamp >= time_window_start and timestamp < time_window_end:
                window_data.append(row)
            else:
                # Save the data collected for the previous time window
                if window_data:
                    save_to_csv(window_data, time_window_start)

                # Move to the next time window
                time_window_start += timedelta(seconds=2)
                time_window_end = time_window_start + timedelta(seconds=2)

                # Reset window_data for the new time window
                window_data = [row]

        # Save the last collected data for the final time window
        if window_data:
            save_to_csv(window_data, time_window_start)

# Function to save filtered data to CSV file
def save_to_csv(data, time_window_start):
    output_filename = os.path.join(output_folder, f"{time_window_start.strftime('%Y-%m-%d_%H-%M-%S')}_processed.csv")

    with open(output_filename, 'w', newline='') as csvfile:
        fieldnames = data[0].keys()
        csvwriter = csv.DictWriter(csvfile, fieldnames=fieldnames)

        csvwriter.writeheader()
        csvwriter.writerows(data)

# Process the CSV file with time windows
process_csv_with_time_windows(filename)

In [3]:
# Function to read CSV file and return sorted data
def read_and_sort_csv(filename):
    data = []

    with open(filename, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)

        for row in csvreader:
            data.append(row)

    # Sort data by timestamp
    sorted_data = sorted(data, key=lambda x: x['timestamp'])

    return sorted_data

In [4]:
# Example usage
sorted_data = read_and_sort_csv(filename)



In [5]:
# Print sorted data
for row in sorted_data:
    print(row)

{'timestamp': '2007-08-04 21:04:48', 'datapath_id': '1', 'flow_id': '192.95.27.1905001171.126.222.64354516', 'ip_src': '40.75.89.172', 'tp_src': '0', 'ip_dst': '71.126.222.64', 'tp_dst': '0', 'ip_proto': '1', 'icmp_code': '0', 'icmp_type': '8', 'flow_duration_sec': '0', 'flow_duration_nsec': '0', 'idle_timeout': '0', 'hard_timeout': '0', 'flags': '0', 'packet_count': '1', 'byte_count': '48', 'packet_count_per_second': '0', 'packet_count_per_nsecond': '0', 'byte_count_per_second': '0', 'byte_count_per_nsecond': '0'}
{'timestamp': '2007-08-04 21:04:48', 'datapath_id': '1', 'flow_id': '192.95.27.1905001371.126.222.64354516', 'ip_src': '40.75.89.172', 'tp_src': '0', 'ip_dst': '71.126.222.64', 'tp_dst': '0', 'ip_proto': '1', 'icmp_code': '0', 'icmp_type': '8', 'flow_duration_sec': '0', 'flow_duration_nsec': '0', 'idle_timeout': '0', 'hard_timeout': '0', 'flags': '0', 'packet_count': '1', 'byte_count': '48', 'packet_count_per_second': '0', 'packet_count_per_nsecond': '0', 'byte_count_per_sec