# LIBRARIES

In [43]:
import csv
import random
import numpy as np
import pandas as pd

# SENSORS FILES AND PARAMETERS OF THE ATTACK

In [44]:
sensors_info = {
f'Dataset/Casale di Goito - Idrometro Monte - 2024-08-19.csv': {'min_value': 20.411, 'max_value': 26.832},
f'Dataset/Casale di Goito - Idrometro Pressione Valle - 2024-08-19.csv': {'min_value': 18.801, 'max_value': 24.797},
f'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv': {'min_value': 41.3, 'max_value': 46.297},
f'Dataset/Pozzolo - Idrometro Valle - 2024-08-19.csv': {'min_value': 42.8, 'max_value': 45.076},
f'Dataset/Salionze edificio regolatore - Idrometro pressione di Monte - 2024-08-19.csv': {'min_value': 59.911, 'max_value': 66.794},
f'Dataset/Salionze Mincio - Livello Idrometrico - 2024-08-19.csv': {'min_value': 0, 'max_value': 3.854}
}

# Attack parameters
number_attacks = 50
attack_duration = 2 * 4  # 2 hours of attack
print(f'Number of attacks: {number_attacks} made of {attack_duration} measurements')
print(f'In total there are {number_attacks * attack_duration} fake measurements')

Number of attacks: 50 made of 8 measurements
In total there are 400 fake measurements


# SELECTION OF SENSORS TO ATTACK

In [45]:
# Select all sensors for the attack
mandatory_sensor = f'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv'
other_sensors = [sensor for sensor in sensors_info if sensor != mandatory_sensor]
sensors_to_attack = [other_sensors[0]] + [other_sensors[1]] + [mandatory_sensor] + [other_sensors[2]] + [other_sensors[3]] + [other_sensors[4]]

# 2 possible Scenarios 
#attack_scenario = 1 # Random attack
attack_scenario = 2 # Reply attack

# DEFINE THE FUNCTION TO CLEAN THE CSV FILES

In [46]:
def load_and_clean_data(sensor_file):
    # Loading dataset
    df = pd.read_csv(sensor_file, usecols=[0], skiprows=1, header=None)
    
    # Extract the value from each row
    numeric_values = []
    dates = []
    valid_count = 0 # Count how many values we have

    for i, row in enumerate(df.values):
            
        string_value = row[0] # Save the row as a string
        parts = string_value.split(';') # Split the string at each semicolon
        date_value = parts[0] # Extract the date
        
        # Handle invalid or special cases without exceptions
        if parts[1].strip() == '' or parts[1] == '-' or not parts[1].replace('.', '', 1).isdigit():
            # Special case: invalid first value, find the first valid value after it
            if i == 0:
                j = i + 1
                while j < len(df):
                    next_value_str = df.iloc[j, 0].split(';')[1].strip()
                    if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                        numeric_value = float(next_value_str)
                        break
                    j += 1
                else:
                    numeric_value = 0
            else:
                # Find the next valid positive value
                prev_value = numeric_values[-1]
                next_value = prev_value
                j = i + 1
                while j < len(df):
                    next_value_str = df.iloc[j, 0].split(';')[1].strip()
                    if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                        next_value = float(next_value_str)
                        break
                    j += 1
                numeric_value = (prev_value + next_value) / 2 # Calculate the mean
        else:
            # If the value is valid, process it directly
            numeric_value = float(parts[1])
            if numeric_value < 0 or numeric_value > 80:
                # Treat negative or overly large values as invalid
                if i == 0:
                    j = i + 1
                    while j < len(df):
                        next_value_str = df.iloc[j, 0].split(';')[1].strip()
                        if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                            numeric_value = float(next_value_str)
                            break
                        j += 1
                    else:
                        numeric_value = 0
                else:
                    prev_value = numeric_values[-1]
                    next_value = prev_value
                    j = i + 1
                    while j < len(df):
                        next_value_str = df.iloc[j, 0].split(';')[1].strip()
                        if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                            next_value = float(next_value_str)
                            break
                        j += 1
                    numeric_value = (prev_value + next_value) / 2 # Calculate the mean

        numeric_values.append(numeric_value)
        dates.append(date_value)
        valid_count += 1 # Increment the counter

    # Creating DataFrame with extracted data
    df_cleaned = pd.DataFrame({'date': dates, 'value': numeric_values})
    
    return df_cleaned

# DEFINE THE FUNCTION TO CREATE FAKE MEASUREMENTS

In [47]:
def attack_sensor_data(df_cleaned, attack_duration, min_value, max_value, attack_indices):
    
    if (attack_scenario == 1):
        range_limit = min_value + (max_value - min_value) / 3
        attack_method = 'Random'
    elif (attack_scenario == 2):
        range_limit = min_value + (max_value - min_value) / 2.9
        attack_method = 'Reply'

    attacked_rows = []
    attack_methods = []

    for start_index in attack_indices:
        end_index = start_index + attack_duration
        attack_methods.append((start_index, end_index, attack_method))
        
        for i in range(start_index, end_index):
            old_value = df_cleaned.at[i, 'value']
            if attack_method == 'Random':
                new_value = round(np.random.uniform(min_value, range_limit), 3)  
                # Check if the new value is smaller than the old one and not too close
                if new_value < old_value:
                    if old_value - new_value < (max_value - min_value) / 3:
                        # Regenerate the value to be between the sensor's minimum and a value slightly lower than the old one
                        new_value = round(np.random.uniform(min_value, min_value + (old_value - min_value) / 3), 3)
                if new_value > old_value:
                    # If the new value is larger than the old one, do
                    new_value = round(np.random.uniform(min_value, min_value + (old_value - min_value) / 3), 3)
            elif attack_method == 'Reply': 
                # Take a random value from the DataFrame within the limited range
                # Check if there are values within the limited range
                potential_values = df_cleaned[(df_cleaned['value'] >= min_value) & (df_cleaned['value'] <= range_limit)]
                if not potential_values.empty:
                    sampled_value = potential_values.sample(n=1).iloc[0].value
                    new_value = sampled_value
                    # Check if the new value is bigger than the old one
                    while new_value > old_value:
                        sampled_value = potential_values.sample(n=1).iloc[0].value
                        new_value = sampled_value
                    
            # Add the attack information to the list
            attacked_rows.append((df_cleaned.at[i, 'date'], old_value, new_value, attack_method))
            
            # Apply the new value
            df_cleaned.at[i, 'value'] = new_value
    
    return df_cleaned, attacked_rows

# LOAD, CLEAN, PREPARE THE DATES FOR THE ATTACK AND SAVE TO CSV

In [48]:
dataframes = {}

for sensor_file in sensors_to_attack:
    sensor_info = sensors_info.get(sensor_file, {})
    max_value_survey = sensor_info.get('max_value')
    min_value_survey = sensor_info.get('min_value')
    df_cleaned = load_and_clean_data(sensor_file)
    dataframes[sensor_file] = df_cleaned
    max_value_sensor = df_cleaned['value'].max()
    min_value_sensor = df_cleaned['value'].min()
    sensors_info[sensor_file]['max_value'] = max(max_value_sensor, max_value_survey)
    sensors_info[sensor_file]['min_value'] = min(min_value_sensor, min_value_survey)

# Randomly select row intervals above the average for the attack on the mandatory sensor
mandatory_df = dataframes[mandatory_sensor]
mean_value = mandatory_df['value'].mean()
high_water_indices = mandatory_df.index[mandatory_df['value'] > mean_value].tolist()
attack_start_indices = []
while len(attack_start_indices) < number_attacks and len(high_water_indices) >= attack_duration:
    start_index = random.choice(high_water_indices)
    if start_index + attack_duration <= len(mandatory_df):
        if all(idx in high_water_indices for idx in range(start_index, start_index + attack_duration)):
            attack_start_indices.append(start_index)
            # Remove indices used
            high_water_indices = [idx for idx in high_water_indices if idx < start_index or idx >= start_index + attack_duration]

# Print which sensors have been attacked
print(f"Attack scenario: {attack_scenario}, sensors attacked:\n{sensors_to_attack}\n")

attack_start_indices.sort()
print(f"Selected indexes for the attack: {attack_start_indices}")

# Attack each selected sensor with the same date intervals
for sensor_file in sensors_to_attack:
    sensor_params = sensors_info[sensor_file]
    df_cleaned = dataframes[sensor_file]
    df_attacked, attacked_rows = attack_sensor_data(df_cleaned, attack_duration, sensor_params['min_value'], sensor_params['max_value'], attack_start_indices)
    
    # Save the modified dataset
    attacked_sensor_file = sensor_file.replace('.csv', '-attack_2.csv')
    attacked_sensor_file = attacked_sensor_file.replace('Dataset/', '')
    df_attacked.to_csv(attacked_sensor_file, index=False, header=['date', 'value'], sep=';', float_format='%.2f')
    
    # Print attacked rows
    print(f"Attacks performed for sensor {sensor_file}:")
    for date, old_value, new_value, method in attacked_rows:
        print(f"Date: {date}, Original value: {old_value:.3f}, New value: {new_value:.3f}, Method: {method}")
    
    # Save the modified dates to a CSV file for each sensor
    csv_filename = sensor_file.replace('Dataset/', '')
    csv_filename = csv_filename.replace('.csv', '-Dates_attacked_2.csv')
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Date"])  # Column header
        writer.writerows([[date] for date, _, _, _ in attacked_rows])
    
    print(f"\nModified dates saved in {csv_filename}")

# Save the name of sensors that have been attacked into a single file
csv_sensors = "Sensors_attacked_2.csv"
with open(csv_sensors, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Attacked sensors"])  # Column header
    writer.writerows([[sensor_file.replace('.csv', '-attack_2.csv')] for sensor_file in sensors_to_attack])

# Save the attack scenario
csv_attack_scenario = "Attack_Type_2.csv"
with open(csv_attack_scenario, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Attack type"])  # Column header
    writer.writerow([attack_scenario])

Attack scenario: 2, sensors attacked:
['Dataset/Casale di Goito - Idrometro Monte - 2024-08-19.csv', 'Dataset/Casale di Goito - Idrometro Pressione Valle - 2024-08-19.csv', 'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv', 'Dataset/Pozzolo - Idrometro Valle - 2024-08-19.csv', 'Dataset/Salionze edificio regolatore - Idrometro pressione di Monte - 2024-08-19.csv', 'Dataset/Salionze Mincio - Livello Idrometrico - 2024-08-19.csv']

Selected indexes for the attack: [9498, 13410, 13456, 13884, 14903, 16705, 17382, 17654, 17973, 17992, 19434, 20675, 21462, 22674, 29375, 32856, 34612, 37270, 37839, 39202, 50206, 50417, 50440, 50942, 51584, 51838, 52328, 56704, 59365, 59983, 60250, 60603, 62291, 66932, 67560, 72125, 72332, 74127, 75151, 75799, 78809, 80555, 82567, 85729, 85913, 87986, 88189, 88581, 89490, 90478]
Attacks performed for sensor Dataset/Casale di Goito - Idrometro Monte - 2024-08-19.csv:
Date: 2022-04-09 22:30, Original value: 19.910, New value: 19.910, Method: Reply
Date: 2022-