In [2]:
from loglead.loaders.raw import RawLoader
from loglead.enhancers import EventLogEnhancer, SequenceEnhancer
import polars as pl
import glob, os


In [3]:
log_dir = "../data/lo2-token/light-oauth2-data-1719592986/"
log_files = glob.glob(os.path.join(log_dir, "*.log"))

all_dfs = []

for log_file in log_files:
    loader = RawLoader(
        filename=log_file,
        timestamp_pattern=r"^(\d{1,2}:\d{2}:\d{2}\.\d{3})",
        timestamp_format="%H:%M:%S.%f",
        missing_timestamp_action="merge"
    )
    loader.load()

    # Custom timestamp extraction and padding
    df = loader.df.with_columns([
        pl.col("m_message")
        .str.extract(loader.timestamp_pattern, group_index=1)
        .str.pad_end(12, "0")
        .str.strptime(pl.Datetime, "%H:%M:%S.%f", strict=False)
        .alias("m_timestamp"),

        pl.col("m_message")
        .str.replace(loader.timestamp_pattern, "")
        .alias("m_message"),

        pl.lit(os.path.basename(log_file)).alias("source_file")  # keep file origin
    ])

    # Reorder columns
    df = df.select(["m_timestamp", "m_message", "source_file"])

    all_dfs.append(df)

# Combine all logs
df = pl.concat(all_dfs)

# Enrich with parsed fields
df = df.with_columns([
    pl.col("m_message").str.extract(r"\[([^\]]+)\]", group_index=1).alias("thread"),
    pl.col("m_message").str.extract(r"\] +(\S+)", group_index=1).alias("request_id"),
    pl.col("m_message").str.extract(r"\] +\S+ +(\w+)", group_index=1).alias("level"),
    pl.col("m_message").str.extract(r"\w+ +(\S+ +<init>)", group_index=1).alias("class_method"),
    pl.col("m_message").str.extract(r"<init> - (.*)", group_index=1).alias("log_text")
])

df = df.fill_null("")


  .str.strptime(pl.Datetime, "%H:%M:%S.%f", strict=False)


In [4]:
enhancer = EventLogEnhancer(df)
df = enhancer.normalize()
df = enhancer.parse_spell()
df = enhancer.words()
df = enhancer.trigrams()
df = enhancer.parse_drain()
df = enhancer.parse_tip()
df = enhancer.parse_pliplom()
df = enhancer.parse_iplom()
df = enhancer.parse_brain()
df = enhancer.length()


2025-03-24 14:49:58.962292: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742820599.036731    4632 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742820599.056785    4632 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-24 14:49:59.227225: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  df_part = df_part.with_columns(pl.col("events").list.to_struct()).unnest("events")


In [5]:
from loglead.anomaly_detection import AnomalyDetector
anomaly_detector = AnomalyDetector(
    item_list_col="e_words",
    numeric_cols=["e_words_len", "e_trigrams_len", "e_chars_len", "e_lines_len", "e_event_id_len"],
    store_scores=True,
    print_scores=False,  # ← disable printing evaluation
    auc_roc=False        # ← avoid computing ROC/AUC without labels
)


In [6]:
anomaly_detector.test_train_split(df, test_frac=0.9, shuffle=True)
anomaly_detector.train_IsolationForest(n_estimators=100, contamination="auto")





In [7]:
df = anomaly_detector.predict()


In [10]:
df

row_nr,m_timestamp,m_message,source_file,thread,request_id,level,class_method,log_text,e_message_normalized,e_event_spell_id,e_template_spell,e_words,e_words_len,e_trigrams,e_trigrams_len,e_event_drain_id,e_event_tip_id,e_event_pliplom_id,e_event_iplom_id,e_event_brain_id,e_chars_len,e_lines_len,e_event_id_len,pred_ano
u32,datetime[μs],str,str,str,str,str,str,str,str,str,str,list[str],u32,list[str],u32,str,str,str,str,str,u32,u32,i32,i64
55719,,""" [XNIO-1 task-2] E61-h-vfSqus…","""TOKEN_code_challenge_too_short…","""XNIO-1 task-2""","""E61-h-vfSqus_AkkBGBerA""","""DEBUG""","""c.n.openapi.ApiNormalisedPath …","""path = /oauth2/token, base pat…",""" [XNIO-<NUM> task-<NUM>] E61-…","""c109ca39""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""null""]",18,"["" [X"", ""NIO"", … "" nu""]",131,"""e18""","""e117""","""e5""","""e54efadb""","""E5""",133,0,1,0
21646,,""" [XNIO-1 task-4] 5-28x-M8SL6p…","""TOKEN_register_service_400_ser…","""XNIO-1 task-4""","""5-28x-M8SL6pm7tbQQDe5Q""","""DEBUG""","""c.n.openapi.ApiNormalisedPath …","""normalised = /oauth2/token""",""" [XNIO-<NUM> task-<NUM>] <NUM…","""faa506fe""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""/oauth2/token""]",12,"["" [X"", ""NIO"", … ""tok""]",110,"""e8""","""e294""","""e7e1""","""c9988eed""","""E12""",112,0,1,0
27467,,""" [XNIO-1 task-2] WlcAqVj6TLOU…","""TOKEN_update_service_404_servi…","""XNIO-1 task-2""","""WlcAqVj6TLOU2424jtOeLQ""","""DEBUG""","""c.n.openapi.ApiNormalisedPath …","""normalised = /oauth2/token""",""" [XNIO-<NUM> task-<NUM>] WlcA…","""faa506fe""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""/oauth2/token""]",12,"["" [X"", ""NIO"", … ""tok""]",110,"""e8""","""e294""","""e7e1""","""e54efadb""","""E12""",112,0,1,0
42501,,""" [XNIO-1 task-2] xeK0B9SpQkiB…","""TOKEN_authorization_code_respo…","""XNIO-1 task-2""","""xeK0B9SpQkiBhVWZY1zCaQ""","""DEBUG""","""c.n.openapi.ApiNormalisedPath …","""path =/oauth2/token""",""" [XNIO-<NUM> task-<NUM>] xeK0…","""d3e7d490""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""=/oauth2/token""]",11,"["" [X"", ""NIO"", … ""oke""]",103,"""e5""","""e225""","""e19e1""","""9cf2f45d""","""E8""",105,0,1,0
57137,,""" [XNIO-1 task-1] cjtXnrT8SECe…","""TOKEN_update_client_404_client…","""XNIO-1 task-1""","""cjtXnrT8SECeC37BI5_hAg""","""DEBUG""","""""","""""",""" [XNIO-<NUM> task-<NUM>] cjtX…","""68bb8a40""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""authorization)""]",15,"["" [X"", ""NIO"", … ""ion""]",292,"""e20""","""e129""","""e3e1""","""9cf2f45d""","""E2""",294,0,1,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
81049,,""" [hz._hzInstance_1_dev.partiti…","""TOKEN_code_verifier_missing_pk…","""hz._hzInstance_1_dev.partition…","""DEBUG""","""c""","""""","""""",""" [hz._hzInstance_<NUM>_dev.par…","""9bf2a03b""","""[hz._hzInstance_<NUM>_dev.part…","["""", ""[hz._hzInstance_1_dev.partition-operation.thread-14]"", … ""Store:0e718336-e1ab-4eed-b002-3d1ea851d2cd""]",9,"["" [h"", ""z._"", … ""2cd""]",147,"""e3""","""e4""","""e8""","""e54efadb""","""E7""",149,0,1,0
32469,,""" [XNIO-1 task-4] ww4GNVz-Q5uE…","""TOKEN_get_client_404_no_client…","""XNIO-1 task-4""","""ww4GNVz-Q5uE1ykqEzq6jQ""","""DEBUG""","""c.n.openapi.ApiNormalisedPath …","""path = /oauth2/token, base pat…",""" [XNIO-<NUM> task-<NUM>] ww4G…","""c109ca39""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""null""]",18,"["" [X"", ""NIO"", … "" nu""]",131,"""e4""","""e117""","""e5""","""9cf2f45d""","""E5""",133,0,1,0
87517,,""" [XNIO-1 task-4] g-BWMLGBT_mH…","""TOKEN_get_service_page_400_no_…","""XNIO-1 task-4""","""g-BWMLGBT_mHUL0H77ytvQ""","""INFO""","""""","""""",""" [XNIO-<NUM> task-<NUM>] g-BW…","""6e0da366""","""[XNIO-<NUM> task-<NUM>] * INFO…","["""", ""[XNIO-1"", … ""/config""]",19,"["" [X"", ""NIO"", … ""fig""]",156,"""e16""","""e186""","""e15e1""","""5fea72f6""","""E4""",158,0,1,0
49600,,""" [XNIO-1 task-2] G_YpkU-0T6qX…","""TOKEN_delete_token_404.log""","""XNIO-1 task-2""","""G_YpkU-0T6qXpueHJmCbBw""","""DEBUG""","""c.n.openapi.ApiNormalisedPath …","""path = /oauth2/token, base pat…",""" [XNIO-<NUM> task-<NUM>] G_Yp…","""c109ca39""","""[XNIO-<NUM> task-<NUM>] * DEBU…","["""", ""[XNIO-1"", … ""null""]",18,"["" [X"", ""NIO"", … "" nu""]",131,"""e18""","""e117""","""e5""","""987e1ed3""","""E5""",133,0,1,0


In [8]:
anomalies = df.filter(pl.col("pred_ano") == 1)
