In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import os

In [2]:
def process_swat_data(raw_file, output_file, is_attack=False):
    try:
        if is_attack:
            df = pd.read_csv(raw_file)
        else:
            df = pd.read_csv(raw_file, skiprows=1)

        print(f"\ndf.shape: {df.shape}")
        print("df.columns:", df.columns.tolist())
        
        timestamp_col = df.columns[0]
        df = df.rename(columns={timestamp_col: 'Timestamp'})
        
        df['Timestamp'] = df['Timestamp'].astype(str).str.strip()
        print("\nTimestamp:")
        print(df['Timestamp'].head())
        
        df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%d/%m/%Y %I:%M:%S %p')
        
        for col in df.columns:
            if col != 'Timestamp' and col != 'Normal/Attack':
                df[col] = pd.to_numeric(df[col], errors='coerce')
        
        df = df.fillna(method='ffill').fillna(method='bfill')
        
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) > 0:
            scaler = MinMaxScaler()
            df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
        
        df.to_csv(output_file, index=False)
        print(f"Final.df.shape: {df.shape}")
        
        return df
        
    except Exception as e:
        if 'df' in locals():
            print("\nDataInfo:")
            print(df.info())
        raise

In [3]:
def prepare_swat_dataset(normal_raw, attack_raw, output_dir):
    try:
        os.makedirs(output_dir, exist_ok=True)
        normal_df = process_swat_data(
            normal_raw, 
            os.path.join(output_dir, 'SWaT_normal.csv'),
            is_attack=False
        )
        
        attack_df = process_swat_data(
            attack_raw, 
            os.path.join(output_dir, 'SWaT_attack.csv'),
            is_attack=True
        )
        
        print(f"normal_df.shape: {normal_df.shape}")
        print(f"attack_df.shape: {attack_df.shape}")
        
        return normal_df, attack_df
        
    except Exception as e:
        print(f"\nPrepare.Error: {str(e)}")
        raise

In [4]:
if __name__ == "__main__":
    normal_df, attack_df = prepare_swat_dataset(
        normal_raw='SWaT_Dataset_Normal_v1.csv',
        attack_raw='SWaT_Dataset_Attack_v0.csv',
        output_dir='processed_data'
    )


df.shape: (495000, 53)
df.columns: [' Timestamp', 'FIT101', 'LIT101', 'MV101', 'P101', 'P102', 'AIT201', 'AIT202', 'AIT203', 'FIT201', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P205', 'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401', 'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 'P502', 'PIT501', 'PIT502', 'PIT503', 'FIT601', 'P601', 'P602', 'P603', 'Normal/Attack']

Timestamp:
0    22/12/2015 4:30:00 PM
1    22/12/2015 4:30:01 PM
2    22/12/2015 4:30:02 PM
3    22/12/2015 4:30:03 PM
4    22/12/2015 4:30:04 PM
Name: Timestamp, dtype: object


  df = df.fillna(method='ffill').fillna(method='bfill')


Final.df.shape: (495000, 53)

df.shape: (449919, 53)
df.columns: [' Timestamp', 'FIT101', 'LIT101', ' MV101', 'P101', 'P102', ' AIT201', 'AIT202', 'AIT203', 'FIT201', ' MV201', ' P201', ' P202', 'P203', ' P204', 'P205', 'P206', 'DPIT301', 'FIT301', 'LIT301', 'MV301', 'MV302', ' MV303', 'MV304', 'P301', 'P302', 'AIT401', 'AIT402', 'FIT401', 'LIT401', 'P401', 'P402', 'P403', 'P404', 'UV401', 'AIT501', 'AIT502', 'AIT503', 'AIT504', 'FIT501', 'FIT502', 'FIT503', 'FIT504', 'P501', 'P502', 'PIT501', 'PIT502', 'PIT503', 'FIT601', 'P601', 'P602', 'P603', 'Normal/Attack']

Timestamp:
0    28/12/2015 10:00:00 AM
1    28/12/2015 10:00:01 AM
2    28/12/2015 10:00:02 AM
3    28/12/2015 10:00:03 AM
4    28/12/2015 10:00:04 AM
Name: Timestamp, dtype: object


  df = df.fillna(method='ffill').fillna(method='bfill')


Final.df.shape: (449919, 53)
normal_df.shape: (495000, 53)
attack_df.shape: (449919, 53)
