# Poisoning Attacks

## Dataset Poisoning Sequential Execution 6m 24s

In [27]:
import pandas as pd
import numpy as np
import os
import shutil

def poison_dataset(file_path, output_folder, label):
    df = pd.read_csv(file_path)
    
    # Assuming we want to poison 100% of the selected files using the least frequent label
    least_frequent_label = df[label].value_counts().idxmin()
    df[label] = least_frequent_label
    
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    output_file_name = os.path.join(output_folder, os.path.basename(file_path))
    df.to_csv(output_file_name, index=False)
    print(f"Poisoned dataset saved as {output_file_name}")

def replicate_and_poison(input_folder, output_folder, poison_percentage, label):
    for root, dirs, files in os.walk(input_folder):
        for name in files:
            if name.endswith('.csv'):
                file_path = os.path.join(root, name)
                relative_path = os.path.relpath(root, input_folder)
                new_output_folder = os.path.join(output_folder, relative_path)

                # Check if this file should be poisoned
                if np.random.rand() < poison_percentage / 100:
                    poison_dataset(file_path, new_output_folder, label)
                else:
                    # If not poisoning, just copy the file
                    os.makedirs(new_output_folder, exist_ok=True)
                    shutil.copy(file_path, os.path.join(new_output_folder, name))

def poison_all_files(input_folder, poison_percentage):
    label = 'label' if 'CICIOT2023' in input_folder else 'Label'
    output_folder = input_folder + '_POISONED'
    
    # Ensure the output directory does not exist to start fresh
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    
    csv_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_folder) for f in filenames if f.endswith('.csv')]
    poison_probability = poison_percentage / 100

    # Use a number of threads based on the number of CPUs
    num_threads = multiprocessing.cpu_count()

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        executor.map(handle_file, csv_files, [output_folder]*len(csv_files), [label]*len(csv_files), [poison_probability]*len(csv_files))


# Example usage
input_folder = 'IOTBOTNET2020'  # or 'CICIOT2023'
poison_percentage = 25  # Percentage of files to poison
poison_all_files(input_folder, poison_percentage)


Poisoned dataset saved as IOTBOTNET2020_POISONED\ddos\DDOS HTTP\DDoS_HTTP[1].csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\dos\dos tcp\DoS_TCP[3].csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\scan\service\Service.csv


# Dataset Poisoning Multithreaded Execution

In [1]:
import pandas as pd
import numpy as np
import os
import shutil
from concurrent.futures import ThreadPoolExecutor
import multiprocessing

def poison_dataset(file_path, output_folder, label):
    df = pd.read_csv(file_path)
    
    # Assuming we want to poison 100% of the selected files using the least frequent label
    least_frequent_label = df[label].value_counts().idxmin()
    df[label] = least_frequent_label
    
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    output_file_name = os.path.join(output_folder, os.path.basename(file_path))
    df.to_csv(output_file_name, index=False)
    print(f"Poisoned dataset saved as {output_file_name}")

def handle_file(file_path, output_folder, label, poison_probability):
    relative_path = os.path.relpath(file_path, input_folder)
    new_output_folder = os.path.join(output_folder, os.path.dirname(relative_path))

    # Randomly decide whether to poison the file
    if np.random.rand() < poison_probability:
        poison_dataset(file_path, new_output_folder, label)
    else:
        # Copy the file if not poisoning
        os.makedirs(new_output_folder, exist_ok=True)
        shutil.copy(file_path, os.path.join(new_output_folder, os.path.basename(file_path)))

def poison_all_files(input_folder, poison_percentage):
    label = 'label' if 'CICIOT2023' in input_folder else 'Label'
    output_folder = input_folder + '_POISONED'
    
    # Ensure the output directory does not exist to start fresh
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    
    csv_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_folder) for f in filenames if f.endswith('.csv')]
    poison_probability = poison_percentage / 100

    # Use a number of threads based on the number of CPUs
    num_threads = multiprocessing.cpu_count()

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        executor.map(handle_file, csv_files, [output_folder]*len(csv_files), [label]*len(csv_files), [poison_probability]*len(csv_files))

# Example usage
input_folder = 'IOTBOTNET2020'  # or 'CICIOT2023'
poison_percentage = 25  # Percentage of files to poison
poison_all_files(input_folder, poison_percentage)


Poisoned dataset saved as IOTBOTNET2020_POISONED\theft\data_exfiltration\Data_Exfiltration.csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\theft\keylogging\Keylogging.csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\scan\service\Service.csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\ddos\DDOS TCP\DDoS_TCP[1].csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\dos\dos tcp\DoS_TCP[1].csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\dos\dos tcp\DoS_TCP[2].csv
Poisoned dataset saved as IOTBOTNET2020_POISONED\ddos\DDOS UDP\DDoS_UDP[3].csv


# File Deletion

In [None]:
import os
import random

def deleteFileAttack(dataset=None, fileIndices=[], seed=42):
    random.seed(seed)
    
    datasets = ['ciciot', 'iotbotnet']
    
    if dataset is None:
        dataset = random.choice(datasets)
    
    if dataset == 'ciciot':
        base_path = './ciciot2023/ciciot2023_archive/'
        file_paths = [f'part-{i:05d}-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.xlsx' for i in range(168)]
    elif dataset == 'iotbotnet':
        base_path = './iotbotnet2020_archive/'
        file_paths = [
            'ddos/ddos HTTP/DDoS_HTTP.csv',
            'ddos/DDOS TCP/DDOS_TCP1.csv',
            'ddos/DDOS TCP/DDOS_TCP2.csv',
            'ddos/DDOS TCP/DDOS_TCP3.csv',
            'ddos/DDOS UDP/DDOS_UDP1.csv',
            'ddos/DDOS UDP/DDOS_UDP2.csv',
            'ddos/DDOS UDP/DDOS_UDP3.csv',
            'dos/dos tcp/DOS_TCP1.csv',
            'dos/dos tcp/DOS_TCP2.csv',
            'dos/dos tcp/DOS_TCP3.csv',
            'dos/dos udp/DOS_UDP1.csv',
            'dos/dos udp/DOS_UDP2.csv',
            'dos/dos udp/DOS_UDP3.csv',
            'dos/dos_http/DoS_HTTP.csv',
            'scan/os/os.csv',
            'scan/service/service.csv',
            'theft/data_exflitration/data_exflitration.csv',
            'theft/keylogging/KeyLogging.csv'
        ]
    else:
        return

    num_files = len(file_paths)
    deletion = random.sample(file_paths, num_files)

    for file_path in deletion:
        full_path = os.path.join(base_path, file_path)
        if os.path.exists(full_path):
            os.remove(full_path)
            print(f"Deleted file: {full_path}")
        else:
            print(f"File does not exist: {full_path}")