# Init Test Environment


In [14]:
import logging
import os
from rxoms.utils import rxoms_utils as rutils
import joblib
import os
import pandas as pd


DEFAULT_CONFIG_FOLDER = "/configuration/mltool/"
DEFAULT_PATH_LEVEL = 2
IF_MODEL_FOLDER = "/IF_models"
LOF_MODEL_FOLDER = "/LOF_models"

# User must export RXOMS_PATH before using
RXOMS_PATH = rutils.get_parent_directory(os.getcwd(), DEFAULT_PATH_LEVEL)
logging.basicConfig(
    format="%(asctime)s:%(levelname)s -- %(message)s", level=logging.INFO
)
logging.info(f"RXOMS_PATH: {RXOMS_PATH}")

2024-10-17 15:38:40,195:INFO -- RXOMS_PATH: /home/tringuyen/workspace/RXOMS


# Prediction on simulation data
- Each flow is predicted by a separated trained ML model.
- The data is loaded from the [artifact](artifact/) folder.
- Prediction is save to [anomaly_data](anomaly_data/) folder.

In [15]:
config_path = str(RXOMS_PATH) + DEFAULT_CONFIG_FOLDER + "flow.yaml"
config = rutils.load_config(config_path)
logging.info(f"flow_config: {config}")

for flow in config["flow_list"]:
    # Generate model paths
    model_path_predict_byte = "./artifact{}/model_predict_byte_for_flow_{}.pkl".format(
        LOF_MODEL_FOLDER, str(flow).replace(", ", "_")
    )
    model_path_predict_packet = (
        "./artifact{}/model_predict_packet_for_flow_{}.pkl".format(
            LOF_MODEL_FOLDER, str(flow).replace(", ", "_")
        )
    )

    # Load models
    byte_model = joblib.load(model_path_predict_byte)
    packet_model = joblib.load(model_path_predict_packet)

    byte_feature = ["byte_count_Average_norm", "runtime"]
    packet_feature = ["packet_count_Average_norm", "runtime"]

    flow_data_path = "./flow_data/flow_{}.csv".format(str(flow).replace(", ", "_"))

    flow_data = pd.read_csv(flow_data_path)

    flow_data["byte_scores"] = byte_model.decision_function(flow_data[byte_feature])
    flow_data["packet_scores"] = packet_model.decision_function(
        flow_data[packet_feature]
    )
    flow_data["byte_anomaly"] = byte_model.predict(flow_data[byte_feature])
    flow_data["packet_anomaly"] = packet_model.predict(flow_data[packet_feature])

    anomal_data_path = "./anomal_data/flow_{}.csv".format(str(flow).replace(", ", "_"))

    # Save anomaly detection result
    # flow_data.to_csv(anomal_data_path, mode="a", header=not os.path.exists(anomal_data_path))

2024-10-17 15:38:42,501:INFO -- flow_config: {'flow_list': ["(1, 1, '00:00:00:00:00:04')", "(1, 2, '00:00:00:00:00:04')", "(1, 3, '00:00:00:00:00:04')", "(1, 4, '00:00:00:00:00:01')", "(1, 4, '00:00:00:00:00:02')", "(1, 4, '00:00:00:00:00:03')", "(1, 4, '00:00:00:00:00:06')", "(1, 4, '00:00:00:00:00:07')", "(1, 4, '00:00:00:00:00:08')", "(1, 5, '00:00:00:00:00:04')", "(2, 1, '00:00:00:00:00:02')", "(2, 1, '00:00:00:00:00:07')", "(2, 1, '00:00:00:00:00:08')", "(2, 2, '00:00:00:00:00:02')", "(2, 2, '00:00:00:00:00:07')", "(2, 2, '00:00:00:00:00:08')", "(2, 3, '00:00:00:00:00:04')", "(2, 3, '00:00:00:00:00:05')", "(2, 4, '00:00:00:00:00:04')", "(2, 4, '00:00:00:00:00:05')", "(2, 5, '00:00:00:00:00:04')", "(2, 5, '00:00:00:00:00:05')"]}
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintaina