# LIBRARIES

In [341]:
import csv
import random
import numpy as np
import pandas as pd

# SENSOR FILE for THE ATTACK

In [342]:
sensor_file = f'Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv'

number_attacks = 50
attack_duration = 2 * 4 # 2 hours of attack
min_value = 41.3
max_value = 46.297
# 2 possible Scenarios 
attack_scenario = 1 # Random attack
#attack_scenario = 2 # Reply attack

if (attack_scenario == 1):
    range_limit = min_value + (max_value - min_value) / 3
    attack_method = 'Random'
elif (attack_scenario == 2):
    range_limit = min_value + (max_value - min_value) / 2
    attack_method = 'Reply'

print(f'Attacked sensor: {sensor_file}\n')
print(f'Number of attacks: {number_attacks} made of {attack_duration} measurements')
print(f'In total there are {number_attacks * attack_duration} fake measurements')
print(f'Min value: {min_value}')
print(f'Max value: {max_value}')
print(f'Max value for fake measurements: {range_limit}')

Attacked sensor: Dataset/Pozzolo - Idrometro Monte - 2024-08-19.csv

Number of attacks: 50 made of 8 measurements
In total there are 400 fake measurements
Min value: 41.3
Max value: 46.297
Max value for fake measurements: 43.7985


# LOAD, PARSE and CLEAR THE CSV FILE

In [343]:
numeric_values = []
dates = []
valid_count = 0 # Counter for valid values found

df = pd.read_csv(sensor_file, usecols=[0], skiprows=1, header=None) # Loading dataset

for i, row in enumerate(df.values):
    string_value = row[0] # Get the string from the row
    parts = string_value.split(';') # Split the string by ;

    date_value = parts[0] # Extract the date
    
    # Handle invalid or special cases without exceptions
    if parts[1].strip() == '' or parts[1] == '-' or not parts[1].replace('.', '', 1).isdigit():
        # Special case: invalid first value, find the first valid value after it
        if i == 0:
            j = i + 1
            while j < len(df):
                next_value_str = df.iloc[j, 0].split(';')[1].strip()
                if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                    numeric_value = float(next_value_str)
                    break
                j += 1
            else:
                numeric_value = 0
        else:
            # Find the next valid positive value
            prev_value = numeric_values[-1]
            next_value = prev_value
            j = i + 1
            while j < len(df):
                next_value_str = df.iloc[j, 0].split(';')[1].strip()
                if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                    next_value = float(next_value_str)
                    break
                j += 1
            numeric_value = (prev_value + next_value) / 2 # Calculate the mean
    else:
        # If the value is valid, process it directly
        numeric_value = float(parts[1])
        if numeric_value < 0 or numeric_value > 80:
            # Treat negative or overly large values as invalid
            if i == 0:
                j = i + 1
                while j < len(df):
                    next_value_str = df.iloc[j, 0].split(';')[1].strip()
                    if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                        numeric_value = float(next_value_str)
                        break
                    j += 1
                else:
                    numeric_value = 0
            else:
                prev_value = numeric_values[-1]
                next_value = prev_value
                j = i + 1
                while j < len(df):
                    next_value_str = df.iloc[j, 0].split(';')[1].strip()
                    if next_value_str.replace('.', '', 1).isdigit() and float(next_value_str) > 0 and float(next_value_str) < 80:
                        next_value = float(next_value_str)
                        break
                    j += 1
                numeric_value = (prev_value + next_value) / 2 # Calculate the mean

    numeric_values.append(numeric_value)
    dates.append(date_value)
    valid_count += 1 # Increment the counter of valid values

min_value = min(min_value, min(numeric_values))
max_value = max(max_value, max(numeric_values))

df_cleaned = pd.DataFrame({'date': dates, 'value': numeric_values})

# CALCULATE THE MEAN of THE HEIGHT and FIND MEASUREMENTS GREATER THAN IT

In [344]:
mean_value = df_cleaned['value'].mean()

high_water_indices = df_cleaned.index[df_cleaned['value'] > mean_value].tolist()
length_high_water_indices = len(high_water_indices)
length_high_water_indices = (length_high_water_indices // attack_duration) * attack_duration
high_water_indices = high_water_indices[:length_high_water_indices]

# SELECT a RANGE OF ROWS ABOVE THE MEAN for THE ATTACK

In [345]:
attack_start_indices = []

while len(attack_start_indices) < number_attacks and len(high_water_indices) >= attack_duration:
    start_index = random.choice(high_water_indices)
    if start_index + attack_duration <= len(df_cleaned):
        if all(idx in high_water_indices for idx in range(start_index, start_index + attack_duration)):
            attack_start_indices.append(start_index)
            # Remove indices used
            high_water_indices = [idx for idx in high_water_indices if idx < start_index or idx >= start_index + attack_duration]

attack_start_indices.sort()
print(f"Selected indexes for the attack: {attack_start_indices}")

Selected indexes for the attack: [13261, 15696, 16221, 16611, 17148, 17505, 18970, 19024, 20658, 27007, 29000, 30474, 31216, 33820, 33886, 35969, 37610, 37697, 38853, 51374, 51659, 51909, 53616, 54654, 55298, 57205, 57670, 57852, 58448, 59760, 65066, 65236, 65628, 66312, 66463, 68265, 72171, 74871, 75855, 76061, 76707, 77639, 79174, 80256, 81504, 81831, 86190, 90855, 91999, 92066]


# PREPARE ROWS TO BE ATTACKED

In [None]:
attacked_rows = []
attack_methods = []

for start_index in attack_start_indices:
    end_index = start_index + attack_duration
    attack_methods.append((start_index, end_index, attack_method))
    
    for i in range(start_index, end_index):
        old_value = df_cleaned.at[i, 'value']
        if attack_method == 'Random':
            new_value = round(np.random.uniform(min_value, range_limit), 3)  
            # Check if the new value is smaller than the old one and not too close
            if new_value < old_value:
                if old_value - new_value < (max_value - min_value) / 3:
                    # Regenerate the value to be between the sensor's minimum and a value slightly lower than the old one
                    new_value = round(np.random.uniform(min_value, min_value + (old_value - min_value) / 3), 3)
            if new_value > old_value:
                # If the new value is larger than the old one, do
                new_value = round(np.random.uniform(min_value, min_value + (old_value - min_value) / 3), 3)
        elif attack_method == 'Replay':
            # Take a random value from the DataFrame with values in the limited range
            # Check if there are values in the limited range
            potential_values = df_cleaned[(df_cleaned['value'] >= min_value) & (df_cleaned['value'] <= range_limit)]
            if not potential_values.empty:
                sampled_value = potential_values.sample(n=1).iloc[0].value
                new_value = sampled_value
                # Check if the new value is bigger than the old one
                while new_value > old_value:
                    sampled_value = potential_values.sample(n=1).iloc[0].value
                    new_value = sampled_value
        
        # Add the attack information to the list
        attacked_rows.append((df_cleaned.at[i, 'date'], old_value, new_value, attack_method))
        
        # Apply the new value
        df_cleaned.at[i, 'value'] = new_value

# SAVE and PRINT THE ATTACKED DATES

In [348]:
df_cleaned.to_csv('Pozzolo - Idrometro Monte - 2024-08-19-Attack_1.csv', index=False, header=['data', 'valore'], sep=';',float_format='%.2f')

# Print of the attacked rows
count_attacks = 0
dates = []

print(f"Executed attacks:")
for date, old_value, new_value, method in attacked_rows:
    print(f"Date: {date}, Original value: {old_value:.3f}, New value: {new_value:.3f}, Method: {method}")
    dates.append([date])
 
# Saving the modified dates in a CSV file
csv_filename = "Attacked_dates_sensor_Pozzolo_Monte.csv"
with open(csv_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Data"]) 
    writer.writerows(dates)
 
print(f"\nModified dates saved in {csv_filename}")

# Save the attack scenario
csv_attack_scenario = "Attack_Type_1.csv"
with open(csv_attack_scenario, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Attack type"])  # Column header
    writer.writerow([attack_scenario])

Executed attacks:
Date: 2022-05-19 03:15, Original value: 44.800, New value: 43.630, Method: Reply
Date: 2022-05-19 03:30, Original value: 44.810, New value: 43.690, Method: Reply
Date: 2022-05-19 03:45, Original value: 44.810, New value: 43.500, Method: Reply
Date: 2022-05-19 04:00, Original value: 44.810, New value: 43.640, Method: Reply
Date: 2022-05-19 04:15, Original value: 44.810, New value: 43.560, Method: Reply
Date: 2022-05-19 04:30, Original value: 44.810, New value: 43.560, Method: Reply
Date: 2022-05-19 04:45, Original value: 44.810, New value: 43.530, Method: Reply
Date: 2022-05-19 05:00, Original value: 44.820, New value: 43.240, Method: Reply
Date: 2022-06-13 12:00, Original value: 44.790, New value: 43.610, Method: Reply
Date: 2022-06-13 12:15, Original value: 44.820, New value: 43.380, Method: Reply
Date: 2022-06-13 12:30, Original value: 44.830, New value: 43.610, Method: Reply
Date: 2022-06-13 12:45, Original value: 44.830, New value: 43.350, Method: Reply
Date: 2022