In [1]:
import pandas as pd
import numpy as np

In [2]:
X = pd.read_csv(r"1.1-Output/ProcessedFeatures.csv")
y = pd.read_csv(r"1.1-Output/ProcessedLabels.csv")

In [3]:
print(f"Total Labels: {len(y.Label.unique())}")

matching_labels = [label for label in y['Label'].unique() if "attempted" in str(label).lower()]
print(f"Labels containing 'attempted' ({len(matching_labels)}):")
for label in matching_labels:
    print(label)

print("\n")
all_labels = y['Label'].unique()
nonmatching_labels = [label for label in all_labels if "attempted" not in str(label).lower()]

print(f"Labels NOT containing 'attempted' ({len(nonmatching_labels)}):")
for label in nonmatching_labels:
    print(label)


Total Labels: 27
Labels containing 'attempted' (11):
FTP-Patator - Attempted
SSH-Patator - Attempted
DoS Slowloris - Attempted
DoS Slowhttptest - Attempted
DoS Hulk - Attempted
DoS GoldenEye - Attempted
Web Attack - Brute Force - Attempted
Infiltration - Attempted
Web Attack - XSS - Attempted
Web Attack - SQL Injection - Attempted
Botnet - Attempted


Labels NOT containing 'attempted' (16):
BENIGN
FTP-Patator
SSH-Patator
DoS Slowloris
DoS Slowhttptest
DoS Hulk
DoS GoldenEye
Heartbleed
Web Attack - Brute Force
Infiltration
Infiltration - Portscan
Web Attack - XSS
Web Attack - SQL Injection
Botnet
Portscan
DDoS


In [4]:
X.drop(columns=['ICMP Code', 'ICMP Type', 'Src Port', 'Dst Port', 'Protocol', 'Fwd URG Flags', 'Bwd URG Flags', 'URG Flag Count'], inplace=True)

In [5]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2099971 entries, 0 to 2099970
Data columns (total 76 columns):
 #   Column                      Dtype  
---  ------                      -----  
 0   Flow Duration               int64  
 1   Total Fwd Packet            int64  
 2   Total Bwd packets           int64  
 3   Total Length of Fwd Packet  int64  
 4   Total Length of Bwd Packet  int64  
 5   Fwd Packet Length Max       int64  
 6   Fwd Packet Length Min       int64  
 7   Fwd Packet Length Mean      float64
 8   Fwd Packet Length Std       float64
 9   Bwd Packet Length Max       int64  
 10  Bwd Packet Length Min       int64  
 11  Bwd Packet Length Mean      float64
 12  Bwd Packet Length Std       float64
 13  Flow Bytes/s                float64
 14  Flow Packets/s              float64
 15  Flow IAT Mean               float64
 16  Flow IAT Std                float64
 17  Flow IAT Max                int64  
 18  Flow IAT Min                int64  
 19  Fwd IAT Total        

In [6]:
# Convert to Series if single-column DataFrame
if isinstance(y, pd.DataFrame):
    y = y.iloc[:, 0]

# Ensure target is categorical
y = y.astype('category')

# Initialize result container
fisher_scores = {}

# Loop through features
for feature in X.columns:
    overall_mean = X[feature].mean()
    numerator = 0.0
    denominator = 0.0

    for cls in y.unique():
        class_mask = y == cls
        n_i = class_mask.sum()
        x_i = X.loc[class_mask, feature]
        mu_i = x_i.mean()
        sigma_i = x_i.var()

        numerator += n_i * (mu_i - overall_mean) ** 2
        denominator += n_i * sigma_i

    # Avoid divide-by-zero
    fisher_score = numerator / denominator if denominator != 0 else 0
    fisher_scores[feature] = fisher_score

# Convert to DataFrame and sort
fisher_df = pd.DataFrame(list(fisher_scores.items()), columns=['Feature', 'Fisher Score'])
fisher_df = fisher_df.sort_values(by='Fisher Score', ascending=False)

# Display top features
print(fisher_df)

                       Feature  Fisher Score
11      Bwd Packet Length Mean      4.516691
52        Bwd Segment Size Avg      4.516691
40           Packet Length Std      4.288405
12       Bwd Packet Length Std      4.024574
9        Bwd Packet Length Max      3.435715
..                         ...           ...
46              ACK Flag Count      0.000069
57         Bwd Packet/Bulk Avg      0.000059
2            Total Bwd packets      0.000054
4   Total Length of Bwd Packet      0.000053
56          Bwd Bytes/Bulk Avg      0.000039

[76 rows x 2 columns]


In [7]:
fisher_df

Unnamed: 0,Feature,Fisher Score
11,Bwd Packet Length Mean,4.516691
52,Bwd Segment Size Avg,4.516691
40,Packet Length Std,4.288405
12,Bwd Packet Length Std,4.024574
9,Bwd Packet Length Max,3.435715
...,...,...
46,ACK Flag Count,0.000069
57,Bwd Packet/Bulk Avg,0.000059
2,Total Bwd packets,0.000054
4,Total Length of Bwd Packet,0.000053
