# LIBRARIES

In [1]:
import csv
import math
import random
import pandas as pd

# SENSORS FILES AND PARAMETERS OF THE ATTACK

In [2]:
sensors_info = {
    f'Dataset/Casale di Goito - Idrometro Monte - 2024-08-19.csv': {'min_value': 20.411, 'max_value': 26.832, 'target': 20.5},
    f'Dataset/Casale di Goito - Idrometro Pressione Valle - 2024-08-19.csv': {'min_value': 18.801, 'max_value': 24.797, 'target': 18.9},
    f'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv': {'min_value': 41.3, 'max_value': 46.297, 'target': 41.4},
    f'Dataset/Pozzolo - Idrometro Valle - 2024-08-19.csv': {'min_value': 42.8, 'max_value': 45.076, 'target': 42.9},
    f'Dataset/Salionze edificio regolatore - Idrometro pressione di Monte - 2024-08-19.csv': {'min_value': 59.911, 'max_value': 66.794, 'target': 60},
    f'Dataset/Salionze Mincio - Livello Idrometrico - 2024-08-19.csv': {'min_value': 0, 'max_value': 3.854, 'target': 0.1}
}

# Attack parameters
number_attacks = 50
attack_duration = 2 * 4  # 2 hours of attack
print(f'Number of attacks: {number_attacks} made of {attack_duration} measurements')

Number of attacks: 50 made of 8 measurements


# SELECTION OF SENSORS TO ATTACK

In [3]:
# Either Pozzolo alone, Pozzolo and Goito, Salionze and Pozzolo or all together
mandatory_sensor = f'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv'
other_sensors = [sensor for sensor in sensors_info if sensor != mandatory_sensor]

# 2 possible Scenarios
#attack_scenario = 1 # Pozzolo monte
attack_scenario = 2 # All the sensors

if attack_scenario == 1: # Pozzolo monte
    sensors_to_attack = [mandatory_sensor]
elif attack_scenario == 2:  # All the sensors
    sensors_to_attack = [other_sensors[0]] + [other_sensors[1]] + [mandatory_sensor] + [other_sensors[2]] + [other_sensors[3]] + [other_sensors[4]]

# DEFINE THE FUNCTION TO CLEAN THE CSV FILES

In [4]:
def load_and_clean_data(sensor_file):
    # Loading dataset
    df = pd.read_csv(sensor_file, usecols=[0], skiprows=1, header=None)
    
    # Extract the value from each row
    numeric_values = []
    dates = []
    valid_count = 0 # Count how many values we have

    for i, row in enumerate(df.values):
            
        string_value = row[0] # Save the row as a string
        parts = string_value.split(';') # Split the string at each semicolon
        date_value = parts[0] # Extract the date
        
        # Handle invalid or special cases without exceptions
        if parts[1].strip() == '' or parts[1] == '-' or not parts[1].replace('.', '', 1).isdigit():
            # Special case: invalid first value, find the first valid value after it
            if i == 0:
                j = i + 1
                while j < len(df):
                    next_value_str = df.iloc[j, 0].split(';')[1].strip()
                    if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                        numeric_value = float(next_value_str)
                        break
                    j += 1
                else:
                    numeric_value = 0
            else:
                # Find the next valid positive value
                prev_value = numeric_values[-1]
                next_value = prev_value
                j = i + 1
                while j < len(df):
                    next_value_str = df.iloc[j, 0].split(';')[1].strip()
                    if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                        next_value = float(next_value_str)
                        break
                    j += 1
                numeric_value = (prev_value + next_value) / 2 # Calculate the mean
        else:
            # If the value is valid, process it directly
            numeric_value = float(parts[1])
            if numeric_value < 0 or numeric_value > 80:
                # Treat negative or overly large values as invalid
                if i == 0:
                    j = i + 1
                    while j < len(df):
                        next_value_str = df.iloc[j, 0].split(';')[1].strip()
                        if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                            numeric_value = float(next_value_str)
                            break
                        j += 1
                    else:
                        numeric_value = 0
                else:
                    prev_value = numeric_values[-1]
                    next_value = prev_value
                    j = i + 1
                    while j < len(df):
                        next_value_str = df.iloc[j, 0].split(';')[1].strip()
                        if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                            next_value = float(next_value_str)
                            break
                        j += 1
                    numeric_value = (prev_value + next_value) / 2 # Calculate the mean

        numeric_values.append(numeric_value)
        dates.append(date_value)
        valid_count += 1 # Increment the counter

    # Creating DataFrame with extracted data
    df_cleaned = pd.DataFrame({'date': dates, 'value': numeric_values})
    
    return df_cleaned

# DEFINE THE FUNCTION TO CREATE FAKE MEASUREMENTS

In [5]:
def attack_sensor_data(df_cleaned, target, attack_indices, attack_duration, max_differences, decrement, csv_intervals):
    attacked_rows = []

    with open(csv_intervals, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Start Date", "End Date"]) # Header for the CSV
        index_count = 0
        for start_index in attack_indices:
            j = 0
            first_date = None # For tracking the first date of the attack
            last_date = None # For tracking the last date of the attack
            for i in range(start_index, len(df_cleaned)):
                j += 1
                old_value = df_cleaned.at[i, 'value']
                if j == 1:
                    if old_value > target:
                        new_value = old_value - decrement
                        if new_value < target:
                            new_value = target
                    else:
                        new_value = target

                    # Registra la prima data
                    first_date = df_cleaned.at[i, 'date']
                else:
                    if new_value > target:
                        new_value = new_value - decrement
                        if new_value < target:
                            new_value = target
                    else:
                        new_value = target
                
                df_cleaned.at[i, 'value'] = round(new_value, 3)
                attacked_rows.append((df_cleaned.at[i, 'date'], old_value, new_value, 'gradual' if new_value != target else 'constant'))
                
                # Check if the value has reached the target
                if new_value == target:
                    # Save the last date (for now)
                    last_date = df_cleaned.at[i, 'date']
                    rows_left = max_differences[index_count] - j
                    if rows_left < 0:
                        rows_left = 0
                    # Keeps the value constant once the target is reached
                    for k in range(i + 1, i + 1 + rows_left + attack_duration):
                        if k < len(df_cleaned):
                            old_value = df_cleaned.at[k, 'value']
                            df_cleaned.at[k, 'value'] = target
                            attacked_rows.append((df_cleaned.at[k, 'date'], old_value, target, 'constant'))
                            # Save the last date every time a new row is modified
                            last_date = df_cleaned.at[k, 'date']
                    break

            # Once the attack is done, save the first and the last date on a CSV file
            if first_date and last_date:
                writer.writerow([first_date, last_date])
            index_count = index_count + 1

    return df_cleaned, attacked_rows

# DEFINE THE FUNCTION TO CALCULATE THE DIFFERENCE BETWEEN THE ACTUAL VALUE AND THE TARGET

In [6]:
def calculate_difference_from_target(df_cleaned, target, start_index):
    first_value = df_cleaned.at[start_index, 'value']
    difference = first_value - target
    if difference < 0:
        difference = 0
        
    return difference

# LOAD, CLEAN, PREPARE THE DATES FOR THE ATTACK AND SAVE TO CSV

In [7]:
dataframes = {}
for sensor_file in sensors_to_attack:
    sensor_info = sensors_info.get(sensor_file, {})
    max_value_survey = sensor_info.get('max_value')
    min_value_survey = sensor_info.get('min_value')
    df_cleaned = load_and_clean_data(sensor_file)
    dataframes[sensor_file] = df_cleaned
    max_value_sensor = df_cleaned['value'].max()
    min_value_sensor = df_cleaned['value'].min()
    sensors_info[sensor_file]['max_value'] = max(max_value_sensor, max_value_survey)
    sensors_info[sensor_file]['min_value'] = min(min_value_sensor, min_value_survey)
    sensors_info[sensor_file]['target'] = sensors_info[sensor_file]['min_value'] + 0.1

# Randomly select row intervals above the average for the attack on the mandatory sensor
mandatory_df = dataframes[mandatory_sensor]
mean_value = mandatory_df['value'].mean()
high_water_indices = mandatory_df.index[mandatory_df['value'] > mean_value].tolist()
attack_start_indices = []
while len(attack_start_indices) < number_attacks and len(high_water_indices) >= attack_duration:
    start_index = random.choice(high_water_indices)
    if start_index + attack_duration <= len(mandatory_df):
        if all(idx in high_water_indices for idx in range(start_index, start_index + attack_duration)):
            attack_start_indices.append(start_index)
            # Remove indices used
            high_water_indices = [idx for idx in high_water_indices if idx < start_index or idx >= start_index + attack_duration]

# Print which sensors have been attacked
print(f"Attack scenario: {attack_scenario}, sensors attacked:\n{sensors_to_attack}\n")

attack_start_indices.sort()
print(f"Selected indexes for the attack: {attack_start_indices}\n")

# Calculate the difference between the first value and its target for each sensor
decrement = 0.1
max_differences = []
for start_index in attack_start_indices:
    differences = []
    for sensor_file in sensors_to_attack:
        sensor_params = sensors_info[sensor_file]
        df_cleaned = dataframes[sensor_file]
        difference = calculate_difference_from_target(df_cleaned, sensor_params['target'], start_index)
        difference = difference / decrement
        differences.append(difference)
    max_difference = math.ceil(max(differences))
    max_differences.append(max_difference)

attacked_rows_length = []

# Attack each selected sensor with the same date intervals
for sensor_file in sensors_to_attack:
    csv_attacks_intervals = sensor_file.replace('Dataset/', '')
    csv_attacks_intervals = csv_attacks_intervals.replace('.csv', '-Attacks_Intervals_3.csv')

    sensor_params = sensors_info[sensor_file]
    df_cleaned = dataframes[sensor_file]
    df_attacked, attacked_rows = attack_sensor_data(df_cleaned, sensor_params['target'], attack_start_indices, attack_duration, max_differences, decrement, csv_attacks_intervals)
    
    # Save the modified dataset
    attacked_sensor_file = sensor_file.replace('.csv', '-attack_3.csv')
    attacked_sensor_file = attacked_sensor_file.replace('Dataset/', '')
    df_attacked.to_csv(attacked_sensor_file, index=False, header=['date', 'value'], sep=';', float_format='%.2f')

    attacked_rows_length.append(len(attacked_rows))

    # Print the attacked rows
    print(f"Attacks executed for sensor {sensor_file}:")
    for date, old_value, new_value, method in attacked_rows:
        print(f"Date: {date}, Original value: {old_value:.3f}, New value: {new_value:.3f}, Method: {method}")
    
    # Save the modified dates in a CSV file for each sensor
    csv_filename = sensor_file.replace('Dataset/', '')
    csv_filename = csv_filename.replace('.csv', '-Dates_attacked_3.csv')
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Date"])  # Column header
        writer.writerows([[date] for date, _, _, _ in attacked_rows])
    
    print(f"\nModified dates saved in {csv_filename}\n")

print(f'The number of attacked rows is: {attacked_rows_length}')

# Save all the sensors that have been attacked in a single file
csv_sensors = "Sensors_attacked_3.csv"
with open(csv_sensors, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Attacked sensors"])  # Column header
    writer.writerows([[sensor_file.replace('.csv', '-attack_3.csv')] for sensor_file in sensors_to_attack])

# Save the attack scenario
csv_attack_scenario = "Attack_Type_3.csv"
with open(csv_attack_scenario, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Attack type"])  # Column header
    writer.writerow([attack_scenario])

19.87
18.901
41.4
42.63
60.011
0.1
Attack scenario: 2, sensors attacked:
['Dataset/Casale di Goito - Idrometro Monte - 2024-08-19.csv', 'Dataset/Casale di Goito - Idrometro Pressione Valle - 2024-08-19.csv', 'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv', 'Dataset/Pozzolo - Idrometro Valle - 2024-08-19.csv', 'Dataset/Salionze edificio regolatore - Idrometro pressione di Monte - 2024-08-19.csv', 'Dataset/Salionze Mincio - Livello Idrometrico - 2024-08-19.csv']

Selected indexes for the attack: [9962, 10528, 13256, 14756, 16032, 18694, 19415, 20499, 20961, 21343, 21821, 21960, 24000, 24126, 26988, 29197, 30865, 32242, 32983, 34031, 35792, 37808, 39031, 44914, 46858, 50598, 51023, 51716, 52766, 53783, 54273, 54388, 57780, 58301, 59009, 61110, 65689, 66920, 74464, 75717, 75978, 76073, 78173, 79993, 84504, 85011, 85814, 89336, 89483, 91417]

Attacks executed for sensor Dataset/Casale di Goito - Idrometro Monte - 2024-08-19.csv:
Date: 2022-04-14 18:30, Original value: 20.070, New value