In [1]:
import pandas as pd

In [4]:
import pickle
import ast
import numpy as np

# ----------------------------
# Load trained artifacts
# ----------------------------
with open("final_rf_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

with open("attack_type_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# ----------------------------
# Parse single CSV-like line
# ----------------------------
def parse_input(line: str):
    parts = []
    current = ""
    in_quotes = False

    for char in line:
        if char == '"' and not in_quotes:
            in_quotes = True
            continue
        elif char == '"' and in_quotes:
            in_quotes = False
            continue

        if char == "," and not in_quotes:
            parts.append(current)
            current = ""
        else:
            current += char

    parts.append(current)

    return parts


# ----------------------------
# Build feature vector
# ----------------------------
def build_features(parsed):
    """
    Adjust indexes ONLY if your training order was different
    """
    method = parsed[2]
    url = parsed[3]
    path = parsed[4]
    body = parsed[5]

    body_len = float(parsed[7])
    entropy = float(parsed[8])
    special_chars = float(parsed[9])
    is_ddos = int(parsed[10] == "True")

    # Combine text features (same logic as training)
    text_payload = f"{method} {url} {path} {body}"

    tfidf_features = vectorizer.transform([text_payload])

    numeric_features = np.array(
        [[body_len, entropy, special_chars, is_ddos]]
    )

    return tfidf_features, numeric_features


# ----------------------------
# Predict function
# ----------------------------
def predict_attack(line):
    parsed = parse_input(line)
    tfidf_features, numeric_features = build_features(parsed)

    # Combine features
    X = np.hstack([tfidf_features.toarray(), numeric_features])

    pred_class = model.predict(X)[0]
    proba = model.predict_proba(X).max()

    attack_name = label_encoder.inverse_transform([pred_class])[0]

    return attack_name, round(proba * 100, 2)


# ----------------------------
# Run
# ----------------------------
if __name__ == "__main__":
    print("2025-12-24T09:17:16.000000Z,\"('127.0.0.1'\, 31580)\",GET,http://192.168.122.170/dvwa/vulnerabilities/xss_r/?msg=Nice website,/dvwa/vulnerabilities/xss_r/,msg=Nice website,NORMAL,16.0,1.332,2.0,False,[1 0 0 0 0 0 0]")
    user_input = input().strip()

    attack, confidence = predict_attack(user_input)

    print("\n=== Prediction Result ===")
    print(f"Attack Type : {attack}")
    print(f"Confidence  : {confidence}%")


  print("2025-12-24T09:17:16.000000Z,\"('127.0.0.1'\, 31580)\",GET,http://192.168.122.170/dvwa/vulnerabilities/xss_r/?msg=Nice website,/dvwa/vulnerabilities/xss_r/,msg=Nice website,NORMAL,16.0,1.332,2.0,False,[1 0 0 0 0 0 0]")
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  print("2025-12-24T09:17:16.000000Z,\"('127.0.0.1'\, 31580)\",GET,http://192.168.122.170/dvwa/vulnerabilities/xss_r/?msg=Nice website,/dvwa/vulnerabilities/xss_r/,msg=Nice website,NORMAL,16.0,1.332,2.0,False,[1 0 0 0 0 0 0]")


UnpicklingError: STACK_GLOBAL requires str

In [5]:
%pip uninstall -y numpy scikit-learn
%pip install numpy==1.23.5 scikit-learn==1.2.2


Found existing installation: numpy 2.3.2
Uninstalling numpy-2.3.2:
  Successfully uninstalled numpy-2.3.2
Found existing installation: scikit-learn 1.7.1
Uninstalling scikit-learn-1.7.1:
  Successfully uninstalled scikit-learn-1.7.1
Note: you may need to restart the kernel to use updated packages.
Collecting numpy==1.23.5
  Downloading numpy-1.23.5.tar.gz (10.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m453.7 kB/s[0m  [33m0:00:23[0m [36m0:00:01[0m[36m0:00:02[0m:11[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/matrix/Documents/vscode/PROJECT/venv/lib/python3.13/site-packages/pip/_internal/cli/base_command.py", line 107, in _run_wrapper
    status = _inner_run()
  File "/home/matrix/Documents/vscode/PROJECT/venv/lib/python3.13/site-packages/pip/_internal/cli/base_command.py", line 98

In [6]:
import joblib
import numpy as np
import re

# -----------------------------------
# Load trained artifacts (SAFE)
# -----------------------------------
model = joblib.load("final_rf_model.joblib")
vectorizer = joblib.load("tfidf_vectorizer.joblib")
label_encoder = joblib.load("attack_type_encoder.joblib")

# -----------------------------------
# Robust CSV-line parser
# -----------------------------------
def parse_csv_line(line):
    pattern = re.compile(r'''
        (?:^|,)
        (?:
          "(.*?)"     # quoted
          |
          ([^",]+)    # unquoted
        )
    ''', re.VERBOSE)

    fields = []
    for match in pattern.finditer(line):
        fields.append(match.group(1) or match.group(2))
    return fields


# -----------------------------------
# Feature Engineering
# -----------------------------------
def build_features(fields):
    """
    Column order must match training:
    0 timestamp
    1 client_ip
    2 method
    3 url
    4 path
    5 body
    6 attack_type (ignored during prediction)
    7 body_len
    8 entropy
    9 special_char_count
    10 is_ddos
    """

    method = fields[2]
    url = fields[3]
    path = fields[4]
    body = fields[5]

    body_len = float(fields[7])
    entropy = float(fields[8])
    special_chars = float(fields[9])
    is_ddos = 1 if fields[10].lower() == "true" else 0

    # Text payload (same logic as training)
    text_payload = f"{method} {url} {path} {body}"

    X_text = vectorizer.transform([text_payload]).toarray()

    X_numeric = np.array([[body_len, entropy, special_chars, is_ddos]])

    return np.hstack((X_text, X_numeric))


# -----------------------------------
# Prediction
# -----------------------------------
def predict_attack(line):
    fields = parse_csv_line(line)

    if len(fields) < 11:
        raise ValueError("Invalid input format")

    X = build_features(fields)

    pred = model.predict(X)[0]
    prob = model.predict_proba(X).max()

    attack_name = label_encoder.inverse_transform([pred])[0]

    return attack_name, round(prob * 100, 2)


# -----------------------------------
# CLI
# -----------------------------------
if __name__ == "__main__":
    print("\nPaste full request line:")
    line = input().strip()

    attack, confidence = predict_attack(line)

    print("\n========== RESULT ==========")
    print(f"Attack Type : {attack}")
    print(f"Confidence  : {confidence}%")


FileNotFoundError: [Errno 2] No such file or directory: 'final_rf_model.joblib'