In [1]:
# Use the wget command to download the ZIP file containing all CSVs
!wget http://cicresearch.ca/CICDataset/CIC-IDS-2017/Dataset/CIC-IDS-2017/CSVs/MachineLearningCSV.zip

# The file will be saved in your current working directory in the Colab instance, which is typically /content/

--2025-10-04 07:49:36--  http://cicresearch.ca/CICDataset/CIC-IDS-2017/Dataset/CIC-IDS-2017/CSVs/MachineLearningCSV.zip
Resolving cicresearch.ca (cicresearch.ca)... 205.174.165.80
Connecting to cicresearch.ca (cicresearch.ca)|205.174.165.80|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 235102953 (224M) [application/zip]
Saving to: ‘MachineLearningCSV.zip’


2025-10-04 07:49:47 (22.4 MB/s) - ‘MachineLearningCSV.zip’ saved [235102953/235102953]



In [2]:
 # Unzip the downloaded file
!unzip MachineLearningCSV.zip

Archive:  MachineLearningCSV.zip
   creating: MachineLearningCVE/
  inflating: MachineLearningCVE/Wednesday-workingHours.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Monday-WorkingHours.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv  
  inflating: MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv  


In [2]:
import pandas as pd
import os

# Define the base directory where the CSV files are located
data_dir = '/content/MachineLearningCVE/'

# Get a list of all CSV files in that directory
# We use endswith('.csv') to ensure we only target the data files.
all_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.csv')]

print(f"Found {len(all_files)} files to process.")
# print(all_files) # Uncomment this to see the list of file paths

Found 8 files to process.


In [3]:
import pandas as pd
import os
import numpy as np

# Define the base directory where the CSV files are located (using your provided path)
data_dir = '/content/MachineLearningCVE/'

# Get a list of all CSV files in that directory
all_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.csv')]

print(f"Found {len(all_files)} files to process.\n")

# Initialize an empty list to hold the DataFrames
list_of_dfs = []

# --- 1. Read and Concatenate ---
for file_path in all_files:
    try:
        # Read the CSV file
        # The 'low_memory=False' argument is often needed for large network traffic datasets
        df_temp = pd.read_csv(file_path, low_memory=False)

        # Add the temporary DataFrame to our list
        list_of_dfs.append(df_temp)

        print(f"Loaded: {os.path.basename(file_path)} with shape {df_temp.shape}")

    except Exception as e:
        print(f"Error loading {os.path.basename(file_path)}: {e}")

# Concatenate all DataFrames in the list into one master DataFrame
df_master = pd.concat(list_of_dfs, ignore_index=True)

print("\n--- Concatenation Complete ---")
print(f"Final shape of the raw master DataFrame: {df_master.shape}")

# --- 2. Data Clean-up (Crucial for CIC-IDS-2017) ---

# A. Clean up column names: remove leading/trailing spaces
df_master.columns = df_master.columns.str.strip()

# B. Handle Infinity and NaN values
# The dataset contains 'Infinity' values, which can cause issues in model training.
# Replace infinite values with NaN
df_master.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill any remaining NaN values (often caused by the infinity replacement) with 0.
# You might choose a different imputation strategy later, but 0 is a common starting point.
df_master.fillna(0, inplace=True)

print("\n--- Data Clean-up Complete ---")
print(f"Number of missing values (NaN/Inf) remaining: {df_master.isna().sum().sum()}")
print(f"Final shape of the cleaned master DataFrame: {df_master.shape}")
print("\nFirst 5 rows of the combined DataFrame:")
print(df_master.head())

Found 8 files to process.

Loaded: Monday-WorkingHours.pcap_ISCX.csv with shape (529918, 79)
Loaded: Wednesday-workingHours.pcap_ISCX.csv with shape (692703, 79)
Loaded: Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv with shape (170366, 79)
Loaded: Tuesday-WorkingHours.pcap_ISCX.csv with shape (445909, 79)
Loaded: Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv with shape (225745, 79)
Loaded: Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv with shape (286467, 79)
Loaded: Friday-WorkingHours-Morning.pcap_ISCX.csv with shape (191033, 79)
Loaded: Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv with shape (288602, 79)

--- Concatenation Complete ---
Final shape of the raw master DataFrame: (2830743, 79)

--- Data Clean-up Complete ---
Number of missing values (NaN/Inf) remaining: 0
Final shape of the cleaned master DataFrame: (2830743, 79)

First 5 rows of the combined DataFrame:
   Destination Port  Flow Duration  Total Fwd Packets  Total Backward Packets  \
0   

In [4]:
from sklearn.preprocessing import LabelEncoder

# 1. Non-Target Categorical Features (e.g., 'Protocol')
# Let's inspect data types to confirm the categorical columns.
# We'll stick to 'Protocol' as an example for one-hot encoding if it's present and non-numeric.

# You can run: print(df_master.dtypes) to confirm which columns are 'object' type.

# One-hot encode the 'Protocol' column (if it exists and is an object type)
# Note: For this dataset, 'Protocol' is often already numeric, but we include this step as a general practice.
if 'Protocol' in df_master.columns and df_master['Protocol'].dtype == 'object':
    df_master = pd.get_dummies(df_master, columns=['Protocol'], prefix='Protocol')
    print("One-hot encoded 'Protocol' column.")

# 2. Target Label Encoding
# The 'Label' column contains attack types (Benign, DDoS, etc.)
# We must convert these strings into integers.

# Ensure the column name is correct (it was 'Label' after stripping whitespace)
label_column = 'Label'

le = LabelEncoder()
df_master[label_column + '_encoded'] = le.fit_transform(df_master[label_column])

print(f"Original unique labels: {df_master[label_column].unique()}")
print(f"Encoded labels: {df_master[label_column + '_encoded'].unique()}")

# You can save the mapping to understand which number corresponds to which attack
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("\nLabel Mapping:", label_mapping)

Original unique labels: ['BENIGN' 'DoS slowloris' 'DoS Slowhttptest' 'DoS Hulk' 'DoS GoldenEye'
 'Heartbleed' 'Web Attack � Brute Force' 'Web Attack � XSS'
 'Web Attack � Sql Injection' 'FTP-Patator' 'SSH-Patator' 'DDoS'
 'PortScan' 'Bot' 'Infiltration']
Encoded labels: [ 0  6  5  4  3  8 12 14 13  7 11  2 10  1  9]

Label Mapping: {'BENIGN': np.int64(0), 'Bot': np.int64(1), 'DDoS': np.int64(2), 'DoS GoldenEye': np.int64(3), 'DoS Hulk': np.int64(4), 'DoS Slowhttptest': np.int64(5), 'DoS slowloris': np.int64(6), 'FTP-Patator': np.int64(7), 'Heartbleed': np.int64(8), 'Infiltration': np.int64(9), 'PortScan': np.int64(10), 'SSH-Patator': np.int64(11), 'Web Attack � Brute Force': np.int64(12), 'Web Attack � Sql Injection': np.int64(13), 'Web Attack � XSS': np.int64(14)}


In [5]:
from sklearn.model_selection import train_test_split

# Define the Features (X) and the Target (y)
# X is all columns EXCEPT the original 'Label' column and the new target column.
# y is the new numerical target column.

# Drop the original string label column as it's no longer needed
X = df_master.drop([label_column, label_column + '_encoded'], axis=1)

# Set the encoded label as the target variable
y = df_master[label_column + '_encoded']

# Split the data into training and testing sets (e.g., 70% train, 30% test)
# random_state ensures reproducibility
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print("\n--- Dataset Splitting Complete ---")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")


--- Dataset Splitting Complete ---
X_train shape: (1981520, 78)
X_test shape: (849223, 78)
y_train shape: (1981520,)
y_test shape: (849223,)


In [6]:
from sklearn.preprocessing import StandardScaler

# 1. Initialize the StandardScaler
# This calculates the mean and standard deviation ONLY from the training data.
scaler = StandardScaler()

# 2. Fit the scaler on the training data and transform it
X_train_scaled = scaler.fit_transform(X_train)
print("X_train scaled successfully.")

# 3. Transform the test data using the *fitted* scaler
# DO NOT call .fit() on the test data!
X_test_scaled = scaler.transform(X_test)
print("X_test scaled successfully.")

# Convert back to DataFrame for better usability/column reference (optional but recommended)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

print("\n--- Scaling Complete ---")
print(f"X_train_scaled shape: {X_train_scaled.shape}")
print(f"X_test_scaled shape: {X_test_scaled.shape}")

X_train scaled successfully.
X_test scaled successfully.

--- Scaling Complete ---
X_train_scaled shape: (1981520, 78)
X_test_scaled shape: (849223, 78)


In [7]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import xgboost as xgb
import time # To measure training time

In [9]:
print("--- Training XGBoost Classifier ---")

# Use unscaled data (X_train) as tree-based models are scale-invariant
X_train_xgb = X_train
X_test_xgb = X_test

start_time = time.time()
xgb_model = xgb.XGBClassifier(
    objective='multi:softmax', # For multi-class classification
    num_class=len(y_train.unique()),
    n_estimators=100, # Number of boosting rounds (trees)
    random_state=42,
    eval_metric='mlogloss'
)

# Train the model
xgb_model.fit(X_train_xgb, y_train)
training_time = time.time() - start_time
print(f"Training Time: {training_time:.2f} seconds")

# Make predictions on the test set
y_pred_xgb = xgb_model.predict(X_test_xgb)

# Evaluate
print("\nEvaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(classification_report(y_test, y_pred_xgb, zero_division=0))

--- Training XGBoost Classifier ---
Training Time: 685.24 seconds

Evaluation:
Accuracy: 0.9990
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    681929
           1       0.95      0.76      0.84       590
           2       1.00      1.00      1.00     38408
           3       1.00      1.00      1.00      3088
           4       1.00      1.00      1.00     69322
           5       0.99      0.99      0.99      1650
           6       0.99      1.00      1.00      1739
           7       1.00      1.00      1.00      2381
           8       1.00      1.00      1.00         3
           9       1.00      0.73      0.84        11
          10       0.99      1.00      1.00     47679
          11       1.00      1.00      1.00      1769
          12       0.75      0.88      0.81       452
          13       0.75      0.50      0.60         6
          14       0.53      0.32      0.39       196

    accuracy                          

In [10]:
print("--- Training Random Forest Classifier ---")

# Use unscaled data (X_train)
X_train_rf = X_train
X_test_rf = X_test

start_time = time.time()
rf_model = RandomForestClassifier(
    n_estimators=100, # Number of trees in the forest
    random_state=42,
    n_jobs=-1 # Use all available cores for speed
)

# Train the model
rf_model.fit(X_train_rf, y_train)
training_time = time.time() - start_time
print(f"Training Time: {training_time:.2f} seconds")

# Make predictions
y_pred_rf = rf_model.predict(X_test_rf)

# Evaluate
print("\nEvaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(classification_report(y_test, y_pred_rf, zero_division=0))

--- Training Random Forest Classifier ---
Training Time: 1306.77 seconds

Evaluation:
Accuracy: 0.9985
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    681929
           1       0.90      0.76      0.82       590
           2       1.00      1.00      1.00     38408
           3       1.00      1.00      1.00      3088
           4       1.00      1.00      1.00     69322
           5       0.99      0.99      0.99      1650
           6       0.99      1.00      0.99      1739
           7       1.00      1.00      1.00      2381
           8       1.00      1.00      1.00         3
           9       1.00      0.73      0.84        11
          10       0.99      0.99      0.99     47679
          11       1.00      1.00      1.00      1769
          12       0.75      0.83      0.79       452
          13       0.50      0.33      0.40         6
          14       0.46      0.32      0.37       196

    accuracy                   

In [None]:
import joblib
import os

# --- 1. Mount Google Drive (Required for Colab) ---
# If you are still in Colab, you need to re-run this:
from google.colab import drive
drive.mount('/content/drive')

# Define the folder path in your Google Drive where you want to save the models
SAVE_DIR = '/content/drive/MyDrive/IDS_Models/'
os.makedirs(SAVE_DIR, exist_ok=True)

# --- 2. Save Each Model ---

# Save XGBoost
joblib.dump(xgb_model, os.path.join(SAVE_DIR, 'xgb_cicids2017.joblib'))


print("All models saved successfully to your Google Drive!")

Mounted at /content/drive
All models saved successfully to your Google Drive!


In [None]:
joblib.dump(rf_model, os.path.join(SAVE_DIR, 'rf_cicids2017.joblib'))


['/content/drive/MyDrive/IDS_Models/rf_cicids2017.joblib']

In [1]:
from sklearn.utils import resample
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd # Ensure pandas is imported
import numpy as np # Ensure numpy is imported


# --- Step 1: Convert to float32 to save memory ---
# This is already done globally for X in a previous cell, but ensuring here.
# X is defined in cell PRhKf-vUcvZD

# Combine X + y for sampling
# Ensure X and y are defined before this step.
# X is defined in cell PRhKf-vUcvZD
# y is defined in cell PRhKf-vUcvZD
if 'X' not in locals() or 'y' not in locals():
    print("X and y are not defined. Please run the data splitting cell (PRhKf-vUcvZD) first.")
else:
    # Convert to float32 to save memory
    X_temp = X.astype(np.float32)

    # Combine X + y for sampling
    df_small = pd.concat([X_temp, y], axis=1)

    # --- Step 2: Sample per class ---
    # Define the desired sample size per class
    sample_size_per_class = 50000 # You can adjust this value (e.g., 10000, 5000, etc.)

    df_balanced = df_small.groupby(y.name, group_keys=False).apply(
        lambda x: x.sample(n=min(len(x), sample_size_per_class), random_state=42)
    )

    print("Balanced subset shape:", df_balanced.shape)
    print("Class distribution:\n", df_balanced[y.name].value_counts())

    # Separate back into X, y
    X_bal = df_balanced.drop(columns=[y.name])
    y_bal = df_balanced[y.name]

    # --- Step 3: Train/test split ---
    X_train, X_test, y_train, y_test = train_test_split(
        X_bal, y_bal, test_size=0.3, stratify=y_bal, random_state=42
    )

    # --- Step 4: Scaling ---
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # --- Step 5: Train KNN ---
    # This part is for KNN training and can be kept or removed depending on your focus.
    # If you only want to run SVM, you can comment out or remove this section.
    # knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)
    # print("\n--- Training KNN on Sampled Balanced Subset ---")
    # knn.fit(X_train_scaled, y_train)

    # y_pred = knn.predict(X_test_scaled)

    # --- Step 6: Evaluation ---
    # This part is for KNN evaluation and can be kept or removed.
    # print("\n--- Classification Report ---")
    # print(classification_report(y_test, y_pred, target_names=le.classes_))

    # print("\nConfusion Matrix:")
    # print(confusion_matrix(y_test, y_pred))

X and y are not defined. Please run the data splitting cell (PRhKf-vUcvZD) first.


In [None]:
import os
import joblib

# Set a save directory in your Google Drive
SAVE_DIR = "/content/drive/MyDrive/models"
os.makedirs(SAVE_DIR, exist_ok=True)

# Suppose your trained model is `knn` (or `rf_model` if RandomForest)
joblib.dump(knn, os.path.join(SAVE_DIR, "knn_cicids2017.joblib"))

print("✅ Model saved successfully!")


✅ Model saved successfully!


In [8]:
# ============================
# 🧠 Lightweight SVM Pipeline
# ============================

import os
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report

# ====== 1️⃣ Sampling (safe for Colab) ======
# Limit to 2,000 samples per class
svm_sample = (
    df_master.groupby("Label_encoded", group_keys=False)
    .apply(lambda x: x.sample(min(len(x), 2000), random_state=42))
    .reset_index(drop=True)
)

print(f"Sampled {len(svm_sample)} rows for SVM training.")

# ====== 2️⃣ Feature/Target split ======
X_svm = svm_sample.drop(["Label", "Label_encoded"], axis=1)
y_svm = svm_sample["Label_encoded"]

# ====== 3️⃣ Train/Test split ======
X_train_svm, X_test_svm, y_train_svm, y_test_svm = train_test_split(
    X_svm, y_svm, test_size=0.3, random_state=42, stratify=y_svm
)

# ====== 4️⃣ Scale features ======
scaler_svm = StandardScaler()
X_train_svm = scaler_svm.fit_transform(X_train_svm.astype("float32"))
X_test_svm = scaler_svm.transform(X_test_svm.astype("float32"))

# ====== 5️⃣ Train Linear SVM ======
svm_model = LinearSVC(C=1.0, max_iter=2000, random_state=42)
svm_model.fit(X_train_svm, y_train_svm)

# ====== 6️⃣ Evaluate ======
y_pred_svm = svm_model.predict(X_test_svm)
print("\n✅ SVM Evaluation Results:")
print("Accuracy:", accuracy_score(y_test_svm, y_pred_svm))
print(classification_report(y_test_svm, y_pred_svm))

# ====== 7️⃣ Save model to Drive ======
SAVE_DIR = "/content/drive/MyDrive/IDS_Models"
os.makedirs(SAVE_DIR, exist_ok=True)

joblib.dump(svm_model, os.path.join(SAVE_DIR, "linear_svm_cicids2017.joblib"))
joblib.dump(scaler_svm, os.path.join(SAVE_DIR, "linear_svm_scaler.joblib"))

print(f"\n📁 Model and scaler saved to: {SAVE_DIR}")


  .apply(lambda x: x.sample(min(len(x), 2000), random_state=42))


Sampled 22193 rows for SVM training.

✅ SVM Evaluation Results:
Accuracy: 0.9321117452688495
              precision    recall  f1-score   support

           0       0.96      0.78      0.86       600
           1       0.90      0.98      0.94       590
           2       0.98      1.00      0.99       600
           3       0.99      0.97      0.98       600
           4       0.95      0.99      0.97       600
           5       0.96      0.99      0.97       600
           6       0.96      0.96      0.96       600
           7       0.98      0.99      0.99       600
           8       0.75      1.00      0.86         3
           9       0.83      0.45      0.59        11
          10       0.98      1.00      0.99       600
          11       0.91      0.99      0.95       600
          12       0.68      0.91      0.78       452
          13       0.00      0.00      0.00         6
          14       1.00      0.03      0.05       196

    accuracy                           0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
# ============================
# ⚙️ Logistic Regression Model
# ============================

import os
import joblib
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# ====== 1️⃣ Sampling (reuse SVM strategy) ======
logit_sample = (
    df_master.groupby("Label_encoded", group_keys=False)
    .apply(lambda x: x.sample(min(len(x), 2000), random_state=42))
    .reset_index(drop=True)
)

print(f"Sampled {len(logit_sample)} rows for Logistic Regression.")

# ====== 2️⃣ Features & Target ======
X_logit = logit_sample.drop(["Label", "Label_encoded"], axis=1)
y_logit = logit_sample["Label_encoded"]

# ====== 3️⃣ Train/Test split ======
X_train_logit, X_test_logit, y_train_logit, y_test_logit = train_test_split(
    X_logit, y_logit, test_size=0.3, random_state=42, stratify=y_logit
)

# ====== 4️⃣ Scale the data ======
scaler_logit = StandardScaler()
X_train_logit = scaler_logit.fit_transform(X_train_logit.astype("float32"))
X_test_logit = scaler_logit.transform(X_test_logit.astype("float32"))

# ====== 5️⃣ Train Logistic Regression ======
# 'lbfgs' solver supports multinomial (multiclass) classification
logit_model = LogisticRegression(
    multi_class="multinomial", solver="lbfgs", max_iter=500, n_jobs=-1
)
logit_model.fit(X_train_logit, y_train_logit)

# ====== 6️⃣ Evaluate ======
y_pred_logit = logit_model.predict(X_test_logit)

print("\n✅ Logistic Regression Evaluation Results:")
print("Accuracy:", accuracy_score(y_test_logit, y_pred_logit))
print(classification_report(y_test_logit, y_pred_logit))

# ====== 7️⃣ Save model & scaler ======
SAVE_DIR = "/content/drive/MyDrive/IDS_Models"
os.makedirs(SAVE_DIR, exist_ok=True)

joblib.dump(logit_model, os.path.join(SAVE_DIR, "logistic_cicids2017.joblib"))
joblib.dump(scaler_logit, os.path.join(SAVE_DIR, "logistic_scaler.joblib"))

print(f"\n📁 Model and scaler saved to: {SAVE_DIR}")


Sampled 22193 rows for Logistic Regression.


  .apply(lambda x: x.sample(min(len(x), 2000), random_state=42))



✅ Logistic Regression Evaluation Results:
Accuracy: 0.9294082306999099
              precision    recall  f1-score   support

           0       0.98      0.80      0.88       600
           1       0.91      0.98      0.95       590
           2       0.98      1.00      0.99       600
           3       0.99      0.97      0.98       600
           4       0.94      0.99      0.97       600
           5       0.97      0.94      0.96       600
           6       0.94      0.96      0.95       600
           7       0.98      0.99      0.99       600
           8       1.00      1.00      1.00         3
           9       0.86      0.55      0.67        11
          10       0.97      0.99      0.98       600
          11       0.90      0.99      0.94       600
          12       0.68      0.91      0.78       452
          13       0.00      0.00      0.00         6
          14       0.71      0.03      0.05       196

    accuracy                           0.93      6658
   macro

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
