In [1]:
import pandas as pd

files = {
    'DoS': 'binary_DoS.csv',
    'Benign': 'binary_benign.csv',
    'Spoofing-GAS': 'binary_spoofing-GAS.csv',
    'Spoofing-RPM': 'binary_spoofing-RPM.csv',
    'Spoofing-SPEED': 'binary_spoofing-SPEED.csv',
    'Spoofing-STEERING_WHEEL': 'binary_spoofing-STEERING_WHEEL.csv'
}

columns_of_interest = [
    'ID0', 'ID1', 'ID2', 'ID3', 'ID4', 'ID5', 'ID6', 'ID7', 'ID8', 'ID9', 'ID10', 'ID11', 'ID12', 'ID13', 'ID14', 'ID15', 'ID16',
    'DATA_00', 'DATA_01', 'DATA_02', 'DATA_03', 'DATA_04', 'DATA_05', 'DATA_06', 'DATA_07', 'DATA_08', 'DATA_09', 'DATA_010', 'DATA_011', 'DATA_012', 'DATA_013', 'DATA_014', 'DATA_015', 'DATA_016',
    'DATA_10', 'DATA_11', 'DATA_12', 'DATA_13', 'DATA_14', 'DATA_15', 'DATA_16', 'DATA_17', 'DATA_18', 'DATA_19', 'DATA_110', 'DATA_111', 'DATA_112', 'DATA_113', 'DATA_114', 'DATA_115', 'DATA_116',
    'DATA_20', 'DATA_21', 'DATA_22', 'DATA_23', 'DATA_24', 'DATA_25', 'DATA_26', 'DATA_27', 'DATA_28', 'DATA_29', 'DATA_210', 'DATA_211', 'DATA_212', 'DATA_213', 'DATA_214', 'DATA_215', 'DATA_216',
    'DATA_30', 'DATA_31', 'DATA_32', 'DATA_33', 'DATA_34', 'DATA_35', 'DATA_36', 'DATA_37', 'DATA_38', 'DATA_39', 'DATA_310', 'DATA_311', 'DATA_312', 'DATA_313', 'DATA_314', 'DATA_315', 'DATA_316',
    'DATA_40', 'DATA_41', 'DATA_42', 'DATA_43', 'DATA_44', 'DATA_45', 'DATA_46', 'DATA_47', 'DATA_48', 'DATA_49', 'DATA_410', 'DATA_411', 'DATA_412', 'DATA_413', 'DATA_414', 'DATA_415', 'DATA_416',
    'DATA_50', 'DATA_51', 'DATA_52', 'DATA_53', 'DATA_54', 'DATA_55', 'DATA_56', 'DATA_57', 'DATA_58', 'DATA_59', 'DATA_510', 'DATA_511', 'DATA_512', 'DATA_513', 'DATA_514', 'DATA_515', 'DATA_516',
    'DATA_60', 'DATA_61', 'DATA_62', 'DATA_63', 'DATA_64', 'DATA_65', 'DATA_66', 'DATA_67', 'DATA_68', 'DATA_69', 'DATA_610', 'DATA_611', 'DATA_612', 'DATA_613', 'DATA_614', 'DATA_615', 'DATA_616',
    'DATA_70', 'DATA_71', 'DATA_72', 'DATA_73', 'DATA_74', 'DATA_75', 'DATA_76', 'DATA_77', 'DATA_78', 'DATA_79', 'DATA_710', 'DATA_711', 'DATA_712', 'DATA_713', 'DATA_714', 'DATA_715', 'DATA_716',
]

combined_data = pd.DataFrame()

for attack_type, filepath in files.items():
    try:
        data = pd.read_csv(filepath)
        if not all(col in data.columns for col in columns_of_interest):
            print(f"Some required columns are missing from {filepath}. Check the column names.")
            continue
        combined_data = pd.concat([combined_data, data[columns_of_interest]], ignore_index=True)
        print(f"Data from {filepath} added successfully!")
    except FileNotFoundError:
        print(f"File not found: {filepath}. Check the file path.")
        continue
    except Exception as e:
        print(f"An error occurred while loading {filepath}: {e}")
        continue

if not combined_data.empty:
    descriptive_stats = combined_data.describe(include='all').transpose()
    descriptive_stats.drop(columns=['count'], inplace=True)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    print(descriptive_stats)


Data from binary_DoS.csv added successfully!
Data from binary_benign.csv added successfully!
Data from binary_spoofing-GAS.csv added successfully!
Data from binary_spoofing-RPM.csv added successfully!
Data from binary_spoofing-SPEED.csv added successfully!
Data from binary_spoofing-STEERING_WHEEL.csv added successfully!
              mean       std  min  25%  50%  75%  max
ID0       0.000000  0.000000  0.0  0.0  0.0  0.0  0.0
ID1       0.000000  0.000000  0.0  0.0  0.0  0.0  0.0
ID2       0.000000  0.000000  0.0  0.0  0.0  0.0  0.0
ID3       0.000000  0.000000  0.0  0.0  0.0  0.0  0.0
ID4       0.000000  0.000000  0.0  0.0  0.0  0.0  0.0
ID5       0.000000  0.000000  0.0  0.0  0.0  0.0  0.0
ID6       0.169524  0.375214  0.0  0.0  0.0  0.0  1.0
ID7       0.427185  0.494670  0.0  0.0  0.0  1.0  1.0
ID8       0.291531  0.454467  0.0  0.0  0.0  1.0  1.0
ID9       0.131623  0.338081  0.0  0.0  0.0  0.0  1.0
ID10      0.378410  0.484991  0.0  0.0  0.0  1.0  1.0
ID11      0.504272  0.499982  