<a href="https://colab.research.google.com/github/priandoyo/smartrs/blob/main/smartrc1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# ============================================================
# AI-ASSISTED SIEM WORKSHOP – SIMPLE ANOMALY DETECTION
# Audience: IT Auditor
# Concept: NIST CSF (Identify – Detect – Respond)
# ============================================================

# ==============================
# 1. INSTALL & IMPORT LIBRARIES
# ==============================
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest

# ==============================
# 2. CREATE SYNTHETIC WINDOWS LOG DATA
# (Normally logs come from SIEM / Windows Event Viewer)
# ==============================

data = [
    ["2026-01-01 08:00","UserA","LoginSuccess","Workstation1",4624],
    ["2026-01-01 08:05","UserA","FileAccess","Workstation1",4663],
    ["2026-01-01 08:10","UserB","LoginSuccess","Workstation2",4624],
    ["2026-01-01 08:20","UserC","LoginFail","Workstation3",4625],
    ["2026-01-01 08:21","UserC","LoginFail","Workstation3",4625],
    ["2026-01-01 08:22","UserC","LoginFail","Workstation3",4625],
    ["2026-01-01 08:30","Admin1","PrivilegeChange","Server1",4672],
    ["2026-01-01 08:45","UserD","LoginSuccess","Workstation4",4624],
    ["2026-01-01 09:00","UserE","FileDelete","Server2",4660],
    ["2026-01-01 09:10","UserF","LoginSuccess","Workstation5",4624],
    ["2026-01-01 09:15","UserF","LoginSuccess","Workstation5",4624],
    ["2026-01-01 09:30","UserG","LoginFail","Workstation6",4625],
    ["2026-01-01 09:31","UserG","LoginFail","Workstation6",4625],
    ["2026-01-01 09:32","UserG","LoginFail","Workstation6",4625],
    ["2026-01-01 10:00","Admin2","PrivilegeChange","Server3",4672],
    ["2026-01-01 10:05","UserH","FileAccess","Workstation7",4663],
    ["2026-01-01 10:10","UserI","LoginSuccess","Workstation8",4624],
    ["2026-01-01 10:20","UserJ","FileAccess","Server2",4663],
    ["2026-01-01 10:25","UserK","LoginFail","Workstation9",4625],
    ["2026-01-01 10:26","UserK","LoginFail","Workstation9",4625],
    ["2026-01-01 10:27","UserK","LoginFail","Workstation9",4625],
    ["2026-01-01 11:00","Admin3","PrivilegeChange","Server4",4672],
    ["2026-01-01 11:05","UserL","FileAccess","Workstation10",4663],
    ["2026-01-01 11:15","UserM","LoginSuccess","Workstation11",4624],
    ["2026-01-01 11:25","UserN","FileDelete","Server1",4660],
    ["2026-01-01 11:30","UserO","LoginSuccess","Workstation12",4624],
    ["2026-01-01 11:35","UserP","LoginFail","Workstation13",4625],
    ["2026-01-01 11:36","UserP","LoginFail","Workstation13",4625],
    ["2026-01-01 11:37","UserP","LoginFail","Workstation13",4625],
    ["2026-01-01 12:00","Admin4","PrivilegeChange","Server5",4672],
]

df = pd.DataFrame(data, columns=["Timestamp","User","Activity","Host","EventID"])

print("=== SAMPLE WINDOWS LOG DATA ===")
display(df.head())

# ==============================
# 3. LABELING (FOR TRAINING PURPOSE)
# Auditor Logic:
# - Many LoginFail (4625)
# - Privilege Change (4672)
# - FileDelete (4660)
# ==============================

def label_attack(row):
    if row["EventID"] in [4625,4672,4660]:
        return 1
    return 0

df["Label_Attack"] = df.apply(label_attack, axis=1)

print("\n=== LABELED DATA (1 = suspicious) ===")
display(df.head())

# ==============================
# 4. FEATURE ENGINEERING
# (AI cannot read text → must convert to numbers)
# ==============================

df["EventID_Num"] = df["EventID"]
df["User_Code"] = df["User"].astype("category").cat.codes
df["Host_Code"] = df["Host"].astype("category").cat.codes

features = df[["EventID_Num","User_Code","Host_Code"]]

# ==============================
# 5. AI / ML MODEL – ISOLATION FOREST
# Industry Term:
# "UEBA / Anomaly Detection Engine"
# ==============================

model = IsolationForest(contamination=0.25, random_state=42)
df["Anomaly"] = model.fit_predict(features)

# Convert result
df["Anomaly"] = df["Anomaly"].apply(lambda x: 1 if x == -1 else 0)

# ==============================
# 6. OUTPUT RESULT
# ==============================

print("\n=== DETECTION RESULT ===")
display(df[["Timestamp","User","Activity","EventID","Label_Attack","Anomaly"]])

# ==============================
# 7. SIMPLE INTERPRETATION FOR AUDITORS
# ==============================

total = len(df)
anomalies = df["Anomaly"].sum()

print("\n=== SUMMARY ===")
print(f"Total Logs: {total}")
print(f"AI Detected Anomalies: {anomalies}")
print("\nExplanation:")
print("- Anomaly = behavior different from normal pattern.")
print("- This simulates AI-assisted SIEM detection.")
print("- In NIST CSF, this belongs to DETECT function.")


=== SAMPLE WINDOWS LOG DATA ===


Unnamed: 0,Timestamp,User,Activity,Host,EventID
0,2026-01-01 08:00,UserA,LoginSuccess,Workstation1,4624
1,2026-01-01 08:05,UserA,FileAccess,Workstation1,4663
2,2026-01-01 08:10,UserB,LoginSuccess,Workstation2,4624
3,2026-01-01 08:20,UserC,LoginFail,Workstation3,4625
4,2026-01-01 08:21,UserC,LoginFail,Workstation3,4625



=== LABELED DATA (1 = suspicious) ===


Unnamed: 0,Timestamp,User,Activity,Host,EventID,Label_Attack
0,2026-01-01 08:00,UserA,LoginSuccess,Workstation1,4624,0
1,2026-01-01 08:05,UserA,FileAccess,Workstation1,4663,0
2,2026-01-01 08:10,UserB,LoginSuccess,Workstation2,4624,0
3,2026-01-01 08:20,UserC,LoginFail,Workstation3,4625,1
4,2026-01-01 08:21,UserC,LoginFail,Workstation3,4625,1



=== DETECTION RESULT ===


Unnamed: 0,Timestamp,User,Activity,EventID,Label_Attack,Anomaly
0,2026-01-01 08:00,UserA,LoginSuccess,4624,0,1
1,2026-01-01 08:05,UserA,FileAccess,4663,0,0
2,2026-01-01 08:10,UserB,LoginSuccess,4624,0,0
3,2026-01-01 08:20,UserC,LoginFail,4625,1,0
4,2026-01-01 08:21,UserC,LoginFail,4625,1,0
5,2026-01-01 08:22,UserC,LoginFail,4625,1,0
6,2026-01-01 08:30,Admin1,PrivilegeChange,4672,1,1
7,2026-01-01 08:45,UserD,LoginSuccess,4624,0,0
8,2026-01-01 09:00,UserE,FileDelete,4660,1,1
9,2026-01-01 09:10,UserF,LoginSuccess,4624,0,0



=== SUMMARY ===
Total Logs: 30
AI Detected Anomalies: 8

Explanation:
- Anomaly = behavior different from normal pattern.
- This simulates AI-assisted SIEM detection.
- In NIST CSF, this belongs to DETECT function.
