## Extracting benign samples

In [1]:
import pandas as pd
import numpy as np

# Load the training dataset into a DataFrame
df = pd.read_csv("Dataset/UNSW_NB15_training-set.csv")

# Filter out the benign (normal) traffic samples where the 'label' column is 0
benign = df[df['label'] == 0]  # label 0 indicates normal/benign traffic


In [3]:
benign

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1,0.121478,tcp,-,FIN,6,4,258,172,74.087490,...,1,1,0,0,0,1,1,0,Normal,0
1,2,0.649902,tcp,-,FIN,14,38,734,42014,78.473372,...,1,2,0,0,0,1,6,0,Normal,0
2,3,1.623129,tcp,-,FIN,8,16,364,13186,14.170161,...,1,3,0,0,0,2,6,0,Normal,0
3,4,1.681642,tcp,ftp,FIN,12,12,628,770,13.677108,...,1,3,1,1,0,2,1,0,Normal,0
4,5,0.449454,tcp,-,FIN,10,6,534,268,33.373826,...,1,40,0,0,0,2,39,0,Normal,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115956,115957,0.000000,arp,-,INT,1,0,46,0,0.000000,...,2,2,0,0,0,2,2,1,Normal,0
115957,115958,59.995781,arp,-,INT,2,0,92,0,0.016668,...,2,2,0,0,0,2,2,1,Normal,0
115960,115961,59.995781,arp,-,INT,2,0,92,0,0.016668,...,2,2,0,0,0,2,2,1,Normal,0
115961,115962,59.995781,arp,-,INT,2,0,92,0,0.016668,...,2,2,0,0,0,2,2,1,Normal,0


In [4]:
# Calculate total bytes transferred in each flow by summing source and destination bytes
total_bytes = benign["sbytes"].fillna(0) + benign["dbytes"].fillna(0)

# Create a new DataFrame with selected and synthesized fields for benign traffic
benign_final = pd.DataFrame({
    "Time": benign["dur"].fillna(0),  # Duration of the flow
    "Protcol": benign["proto"].fillna("TCP"),  # Network protocol used (default to TCP if missing)
    "Flag": benign["state"].astype(str).str[0].fillna("A"),  # First letter of connection state as flag
    "Family": ["Benign"] * len(benign),  # Assign 'Benign' label to all rows
    "Clusters": np.random.randint(0, 3, len(benign)),  # Random cluster assignment (0, 1, or 2)
    "SeddAddress": ["1SAFE" + str(i) for i in range(len(benign))],  # Synthetic sender address
    "ExpAddress": ["1EXP" + str(i) for i in range(len(benign))],  # Synthetic recipient address
    "BTC": 0,  # No bitcoin value for benign samples
    "USD": np.random.randint(0, 6, len(benign)) + np.random.rand(len(benign)),  # Random USD values
    "Netflow_Bytes": total_bytes,  # Total bytes transferred in the flow
    "IPaddress": benign["id"].astype(str).apply(lambda x: x[:1]),  # Use first character of 'id' as mock IP
    "Threats": ["None"] * len(benign),  # No threats for benign traffic
    "Port": np.random.choice([80, 443, 53, 8080], len(benign)),  # Random common ports
    "Prediction": ["b"] * len(benign)  # Assign prediction label as 'b' for benign
})

# Remove duplicate rows and randomly sample 51,000 unique benign entries
benign_final = benign_final.drop_duplicates().sample(n=51000, random_state=42).reset_index(drop=True)

# Save the final DataFrame to a CSV file
benign_final.to_csv("Dataset/unsw_benign_51k.csv", index=False)
print("✅ Generated unsw_benign_51k.csv with 51,000 rows")

✅ Generated unsw_benign_51k.csv with 51,000 rows


## exe file conversion

In [22]:
!jupyter nbconvert --to script Hermes.ipynb


[NbConvertApp] Converting notebook Hermes.ipynb to script
[NbConvertApp] Writing 40734 bytes to Hermes.py


In [23]:
!pyinstaller --onefile --windowed Hermes.py --add-data "Model/ann_model.keras;Model" --add-data "Model/nb_model.pkl;Model" --add-data "Model/rfc_model.pkl;Model" --add-data "Model/svm_model.pkl;Model" --add-data "Model/gbc_model.pkl;Model" --add-data "Model/xgb_model.pkl;Model" --add-data "Model/cnn2image_model.keras;Model" --add-data "Dataset/model_metrics.pkl;Dataset" --add-data "Dataset/X_test.npy;Dataset" --add-data "image/stats_icon.png;image" --add-data "image/help.png;image" --add-data "image/background.png;image" --add-data "users.json;."


pygame 2.6.1 (SDL 2.28.4, Python 3.12.7)
Hello from the pygame community. https://www.pygame.org/contribute.html
pygame 2.6.1 (SDL 2.28.4, Python 3.12.7)
Hello from the pygame community. https://www.pygame.org/contribute.html
pygame 2.6.1 (SDL 2.28.4, Python 3.12.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


10547 INFO: PyInstaller: 6.14.1, contrib hooks: 2025.5
10547 INFO: Python: 3.12.7 (conda)
10588 INFO: Platform: Windows-11-10.0.26100-SP0
10588 INFO: Python environment: D:\Anaconda_Python
10589 INFO: wrote C:\Users\DEVANSHU K RANJAN\Downloads\Hermes 1.0\Hermes.spec
10602 INFO: Module search paths (PYTHONPATH):
['D:\\Anaconda_Python\\Scripts\\pyinstaller.exe',
 'D:\\Anaconda_Python\\python312.zip',
 'D:\\Anaconda_Python\\DLLs',
 'D:\\Anaconda_Python\\Lib',
 'D:\\Anaconda_Python',
 'C:\\Users\\DEVANSHU K '
 'RANJAN\\AppData\\Roaming\\Python\\Python312\\site-packages',
 'D:\\Anaconda_Python\\Lib\\site-packages',
 'D:\\Anaconda_Python\\Lib\\site-packages\\win32',
 'D:\\Anaconda_Python\\Lib\\site-packages\\win32\\lib',
 'D:\\Anaconda_Python\\Lib\\site-packages\\Pythonwin',
 'D:\\Anaconda_Python\\Lib\\site-packages\\setuptools\\_vendor',
 'C:\\Users\\DEVANSHU K RANJAN\\Downloads\\Hermes 1.0']
15963 INFO: Appending 'datas' from .spec
15964 INFO: checking Analysis
16799 INFO: Building because

In [3]:
pip install pyinstaller

Collecting pyinstaller
  Downloading pyinstaller-6.14.1-py3-none-win_amd64.whl.metadata (8.3 kB)
Collecting altgraph (from pyinstaller)
  Downloading altgraph-0.17.4-py2.py3-none-any.whl.metadata (7.3 kB)
Collecting pefile!=2024.8.26,>=2022.5.30 (from pyinstaller)
  Downloading pefile-2023.2.7-py3-none-any.whl.metadata (1.4 kB)
Collecting pyinstaller-hooks-contrib>=2025.4 (from pyinstaller)
  Downloading pyinstaller_hooks_contrib-2025.5-py3-none-any.whl.metadata (16 kB)
Downloading pyinstaller-6.14.1-py3-none-win_amd64.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ------- -------------------------------- 0.3/1.4 MB ? eta -:--:--
   ----------------------- ---------------- 0.8/1.4 MB 3.3 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 3.2 MB/s eta 0:00:00
Downloading pefile-2023.2.7-py3-none-any.whl (71 kB)
Downloading pyinstaller_hooks_contrib-2025.5-py3-none-any.whl (437 kB)
Downloading altgraph-0.17.4-py2.py3-none-any.whl

In [16]:
!pip install -U scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.7.0-cp312-cp312-win_amd64.whl.metadata (14 kB)
Downloading scikit_learn-1.7.0-cp312-cp312-win_amd64.whl (10.7 MB)
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.7 MB ? eta -:--:--
    --------------------------------------- 0.3/10.7 MB ? eta -:--:--
   ----- ---------------------------------- 1.6/10.7 MB 4.9 MB/s eta 0:00:02
   ------ --------------------------------- 1.8/10.7 MB 3.6 MB/s eta 0:00:03
   --------- ------------------------------ 2.6/10.7 MB 4.3 MB/s eta 0:00:02
   ----------- ---------------------------- 3.1/10.7 MB 3.5 MB/s eta 0:00:03
   ----------- ---------------------------- 3.1/10.7 MB 3.5 MB/s eta 0:00:03
   ------------- -------------------------- 3.7/10.7 MB 2.7 MB/s eta 0:00:03
   -------------- ---

  You can safely remove it manually.


In [20]:
import joblib
import os
from sklearn import __version__ as sklearn_version

# Directory where your models are stored
model_dir = "Model"

# List of model filenames to re-save
model_files = [
    "rfc_model.pkl",
    "svm_model.pkl",
    "nb_model.pkl",
    "gbc_model.pkl",
    "xgb_model.pkl"
]

for file in model_files:
    path = os.path.join(model_dir, file)
    
    if os.path.exists(path):
        try:
            # Load model (from old version)
            model = joblib.load(path)
            print(f"[+] Loaded: {file}")

            # Save model (using current version)
            joblib.dump(model, path)
            print(f"[✔] Re-saved: {file} using scikit-learn {sklearn_version}\n")

        except Exception as e:
            print(f"[✘] Failed to process {file}: {e}")
    else:
        print(f"[!] File not found: {file}")



[+] Loaded: rfc_model.pkl
[✔] Re-saved: rfc_model.pkl using scikit-learn 1.7.0

[+] Loaded: svm_model.pkl
[✔] Re-saved: svm_model.pkl using scikit-learn 1.7.0

[+] Loaded: nb_model.pkl
[✔] Re-saved: nb_model.pkl using scikit-learn 1.7.0

[+] Loaded: gbc_model.pkl
[✔] Re-saved: gbc_model.pkl using scikit-learn 1.7.0



https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


[+] Loaded: xgb_model.pkl
[✔] Re-saved: xgb_model.pkl using scikit-learn 1.7.0

