In [119]:
import pandas as pd
import numpy as np
import random
from datetime import datetime

#### Enterprise A processes

In [120]:
PARENT_CHILD_MATRIX_A = {
    "explorer.exe": {
        "notepad.exe": 0.50,
        "cmd.exe": 0.15,
        "powershell.exe": 0.05,
        "python.exe": 0.05,
        "wscript.exe": 0.05,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.10,
        "regsvr32.exe": 0.03,
        "mshta.exe": 0.02,
    },
    "cmd.exe": {
        "cmd.exe": 0.40,
        "powershell.exe": 0.20,
        "python.exe": 0.10,
        "notepad.exe": 0.05,
        "wscript.exe": 0.10,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.05,
        "regsvr32.exe": 0.03,
        "mshta.exe": 0.02,
    },
    "powershell.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.25,
        "python.exe": 0.20,
        "notepad.exe": 0.10,
        "wscript.exe": 0.05,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.05,
        "regsvr32.exe": 0.03,
        "mshta.exe": 0.02,
    },
    "python.exe": {
        "python.exe": 0.50,
        "powershell.exe": 0.20,
        "cmd.exe": 0.15,
        "notepad.exe": 0.05,
        "rundll32.exe": 0.05,
        "wscript.exe": 0.03,
        "cscript.exe": 0.02,
    },
    "wscript.exe": {
        "cmd.exe": 0.40,
        "powershell.exe": 0.25,
        "python.exe": 0.15,
        "rundll32.exe": 0.10,
        "mshta.exe": 0.10,
    },
    "cscript.exe": {
        "cmd.exe": 0.35,
        "powershell.exe": 0.25,
        "rundll32.exe": 0.20,
        "mshta.exe": 0.10,
        "python.exe": 0.10,
    },
    "rundll32.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.25,
        "python.exe": 0.25,
        "rundll32.exe": 0.15,
        "mshta.exe": 0.10,
    },
    "regsvr32.exe": {
        "cmd.exe": 0.40,
        "powershell.exe": 0.30,
        "mshta.exe": 0.20,
        "python.exe": 0.10,
    },
    "mshta.exe": {
        "powershell.exe": 0.40,
        "cmd.exe": 0.30,
        "rundll32.exe": 0.20,
        "python.exe": 0.10,
    },
    "services.exe": {
        "cmd.exe": 0.40,
        "powershell.exe": 0.30,
        "python.exe": 0.20,
        "rundll32.exe": 0.10,
    },
    "svchost.exe": {
        "cmd.exe": 0.30,
        "powershell.exe": 0.30,
        "python.exe": 0.20,
        "rundll32.exe": 0.20,
    },
    "winlogon.exe": {
        "cmd.exe": 0.30,
        "powershell.exe": 0.30,
        "rundll32.exe": 0.25,
        "mshta.exe": 0.15,
    },
    "taskeng.exe": {
        "cmd.exe": 0.30,
        "powershell.exe": 0.30,
        "python.exe": 0.20,
        "rundll32.exe": 0.10,
        "mshta.exe": 0.10,
    }
}


In [121]:
CHILD_TARGET_DIST_A = {
    "explorer.exe":   0.42,
    "notepad.exe":    0.18,
    "cmd.exe":        0.12,
    "powershell.exe": 0.04,
    "python.exe":     0.03,
    "wscript.exe":    0.06,
    "cscript.exe":    0.05,
    "rundll32.exe":   0.06,
    "regsvr32.exe":   0.02,
    "mshta.exe":      0.02,
}

#### Enterprise B processes

In [122]:
PARENT_CHILD_MATRIX_B = {
    "explorer.exe": {
        "notepad.exe": 0.40,
        "cmd.exe": 0.15,
        "powershell.exe": 0.10,
        "python.exe": 0.10,
        "wscript.exe": 0.05,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.10,
        "regsvr32.exe": 0.03,
        "mshta.exe": 0.02,
    },
    "cmd.exe": {
        "powershell.exe": 0.30,
        "cmd.exe": 0.20,
        "python.exe": 0.15,
        "notepad.exe": 0.05,
        "wscript.exe": 0.10,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.10,
        "regsvr32.exe": 0.03,
        "mshta.exe": 0.02,
    },
    "powershell.exe": {
        "powershell.exe": 0.30,
        "cmd.exe": 0.20,
        "python.exe": 0.20,
        "notepad.exe": 0.05,
        "wscript.exe": 0.05,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.10,
        "regsvr32.exe": 0.03,
        "mshta.exe": 0.02,
    },
    "python.exe": {
        "python.exe": 0.40,
        "powershell.exe": 0.20,
        "cmd.exe": 0.15,
        "notepad.exe": 0.05,
        "rundll32.exe": 0.10,
        "wscript.exe": 0.05,
        "cscript.exe": 0.05,
    },
    "wscript.exe": {
        "cmd.exe": 0.40,
        "powershell.exe": 0.25,
        "python.exe": 0.15,
        "rundll32.exe": 0.10,
        "mshta.exe": 0.10,
    },
    "cscript.exe": {
        "cmd.exe": 0.35,
        "powershell.exe": 0.25,
        "rundll32.exe": 0.20,
        "mshta.exe": 0.10,
        "python.exe": 0.10,
    },
    "rundll32.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.25,
        "python.exe": 0.25,
        "rundll32.exe": 0.15,
        "mshta.exe": 0.10,
    },
    "regsvr32.exe": {
        "cmd.exe": 0.35,
        "powershell.exe": 0.25,
        "mshta.exe": 0.20,
        "python.exe": 0.20,
    },
    "mshta.exe": {
        "powershell.exe": 0.40,
        "cmd.exe": 0.30,
        "rundll32.exe": 0.20,
        "python.exe": 0.10,
    },
    "services.exe": {
        "cmd.exe": 0.30,
        "powershell.exe": 0.30,
        "python.exe": 0.20,
        "rundll32.exe": 0.20,
    },
    "svchost.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.25,
        "python.exe": 0.20,
        "rundll32.exe": 0.30,
    },
    "winlogon.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.25,
        "rundll32.exe": 0.30,
        "mshta.exe": 0.20,
    },
    "taskeng.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.35,
        "python.exe": 0.20,
        "rundll32.exe": 0.10,
        "mshta.exe": 0.10,
    }
}


In [123]:
CHILD_TARGET_DIST_B = {
    "explorer.exe":    0.3,
    "notepad.exe":     0.14,
    "cmd.exe":         0.12,
    "powershell.exe":  0.12,
    "python.exe":      0.1,
    "wscript.exe":     0.06,
    "cscript.exe":     0.05,
    "rundll32.exe":    0.06,
    "regsvr32.exe":    0.03,
    "mshta.exe":       0.02
}

#### Enterprise C processes

In [124]:
PARENT_CHILD_MATRIX_C = {
    "explorer.exe": {
        "notepad.exe": 0.25,
        "cmd.exe": 0.15,
        "powershell.exe": 0.20,
        "python.exe": 0.15,
        "wscript.exe": 0.05,
        "cscript.exe": 0.04,
        "rundll32.exe": 0.08,
        "regsvr32.exe": 0.02,
        "mshta.exe": 0.01,
    },
    "cmd.exe": {
        "powershell.exe": 0.30,
        "cmd.exe": 0.10,
        "python.exe": 0.20,
        "wscript.exe": 0.10,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.15,
        "regsvr32.exe": 0.05,
        "mshta.exe": 0.05,
        "notepad.exe": 0.00,
    },
    "powershell.exe": {
        "cmd.exe": 0.20,
        "powershell.exe": 0.20,
        "python.exe": 0.20,
        "wscript.exe": 0.10,
        "cscript.exe": 0.05,
        "rundll32.exe": 0.15,
        "regsvr32.exe": 0.05,
        "mshta.exe": 0.05,
        "notepad.exe": 0.00,
    },
    "python.exe": {
        "powershell.exe": 0.10,
        "cmd.exe": 0.10,
        "python.exe": 0.40,
        "notepad.exe": 0.10,
        "rundll32.exe": 0.10,
        "wscript.exe": 0.05,
        "cscript.exe": 0.05,
        "regsvr32.exe": 0.05,
        "mshta.exe": 0.05,
    },
    "wscript.exe": {
        "cmd.exe": 0.40,
        "powershell.exe": 0.20,
        "python.exe": 0.10,
        "rundll32.exe": 0.15,
        "regsvr32.exe": 0.05,
        "mshta.exe": 0.10,
    },
    "cscript.exe": {
        "cmd.exe": 0.35,
        "powershell.exe": 0.25,
        "rundll32.exe": 0.15,
        "regsvr32.exe": 0.10,
        "mshta.exe": 0.15,
    },
    "rundll32.exe": {
        "regsvr32.exe": 0.25,
        "powershell.exe": 0.20,
        "cmd.exe": 0.20,
        "mshta.exe": 0.10,
        "python.exe": 0.25,
    },
    "regsvr32.exe": {
        "cmd.exe": 0.35,
        "powershell.exe": 0.25,
        "mshta.exe": 0.10,
        "rundll32.exe": 0.30,
    },
    "mshta.exe": {
        "powershell.exe": 0.30,
        "cmd.exe": 0.20,
        "rundll32.exe": 0.25,
        "regsvr32.exe": 0.25,
    },
    "services.exe": {
        "cmd.exe": 0.20,
        "powershell.exe": 0.25,
        "python.exe": 0.20,
        "rundll32.exe": 0.20,
        "mshta.exe": 0.15,
    },
    "svchost.exe": {
        "cmd.exe": 0.20,
        "powershell.exe": 0.25,
        "python.exe": 0.15,
        "rundll32.exe": 0.20,
        "mshta.exe": 0.20,
    },
    "winlogon.exe": {
        "cmd.exe": 0.25,
        "powershell.exe": 0.25,
        "rundll32.exe": 0.25,
        "mshta.exe": 0.25,
    },
    "taskeng.exe": {
        "cmd.exe": 0.20,
        "powershell.exe": 0.30,
        "python.exe": 0.20,
        "rundll32.exe": 0.15,
        "mshta.exe": 0.15,
    }
}


In [125]:
CHILD_TARGET_DIST_C = {
    "explorer.exe":   0.24,
    "notepad.exe":    0.12,
    "cmd.exe":        0.14,
    "powershell.exe": 0.18,
    "python.exe":     0.14,
    "wscript.exe":    0.06,
    "cscript.exe":    0.04,
    "rundll32.exe":   0.05,
    "regsvr32.exe":   0.02,
    "mshta.exe":      0.01
}

#### Matrix

In [126]:
# Data creation
X = 1000  # Number of rows

# hosts, users, roles
hosts = [f"host_{i:02d}" for i in range(1, 4)]
users = [f"user_{i:02d}" for i in range(1, 11)]
roles = ["Admin", "Standard", "Service"]
roles_probs = [0.1, 0.9, 0]

# process & parent_process
def select_process_and_parent(company_type="A"):
    if company_type == "A":
        parent_matrix = PARENT_CHILD_MATRIX_A
        child_dist = CHILD_TARGET_DIST_A
    elif company_type == "B":
        parent_matrix = PARENT_CHILD_MATRIX_B
        child_dist = CHILD_TARGET_DIST_B
    elif company_type == "C":
        parent_matrix = PARENT_CHILD_MATRIX_C
        child_dist = CHILD_TARGET_DIST_C
    else:
        raise ValueError("Invalid company type")

    # --- בוחרים parent מתוך ה-keys של ה-Parent→Child Matrix בלבד ---
    parents = list(parent_matrix.keys())
    
    # אם רוצים גם לקחת בחשבון את CHILD_TARGET_DIST לחלקיות:
    parent_probs = [child_dist.get(p, 0.01) for p in parents]  # default 0.01 אם לא קיים
    total = sum(parent_probs)
    parent_probs = [p/total for p in parent_probs]  # normalize כדי שסכום = 1
    
    parent_process = random.choices(parents, weights=parent_probs, k=1)[0]

    # --- בוחרים child process לפי ה־Parent→Child Matrix ---
    children = list(parent_matrix[parent_process].keys())
    children_probs = list(parent_matrix[parent_process].values())
    process_name = random.choices(children, weights=children_probs, k=1)[0]


    return process_name, parent_process

# command parts
base_commands = ["Get-", "Set-", "New-", "Remove-", "Start-", "Stop-"]
targets = ["Process", "Service", "File", "Registry", "Job"]
folders = ["/usr/bin", "/tmp", "C:\\Windows\\Temp", "C:\\Users\\Public"]
extensions = [".txt", ".log", ".py", ".ps1"]

# execution_result probabilities
exec_probs = [0.98, 0.02]  # success, failure

# anomaly probability
anom_prob = 0.05

# --- פונקציות עזר ---
def random_timestamp(is_anomaly, no_weekend=True):
    while True:
        day = random.randint(1, 31)
        ts_temp = datetime(2025, 12, day, 0, 0, 0) # Temporary timestamp based on day
        weekday = ts_temp.weekday()
        if weekday != 5 and no_weekend: # If we generated Saturday and the enterprise is inactive, generate again
            break

    if is_anomaly:
        if random.random() > 0.2:
            hour = random.choice(list(range(18, 24)) + list(range(0, 5))) # 80% Anomalies outside work hours
        else:
            hour = random.randint(8,18) # 20% Anomalies within work hours
    else:
        if weekday == 4 and no_weekend: # Is it Friday and enterprise is inactive on weekend
            hour = random.randint(8, 14) # Benign work hours Friday short day
        else:
            hour = random.randint(8, 18) # Benign work hours

    minute = random.randint(0, 59)
    second = random.randint(0, 59)
    return datetime(2025, 12, day, hour, minute, second)

def generate_command(is_anomaly):
    num_args = 0
    if is_anomaly:
        cmd_length = random.randint(40, 100)
    else:
        cmd_length = random.randint(10, 50)

    cmd = ""
    while len(cmd) < cmd_length:
        part = random.choice(base_commands) + random.choice(targets)
        arg = random.choice(folders) + random.choice(extensions)
        cmd += f"{part} {arg} "
        num_args += 1
    cmd = cmd[:cmd_length]  # לחתוך אם עבר את האורך הרצוי
    return cmd.strip(), num_args

# --- יצירת הנתונים ---
data = []

for _ in range(X):
    is_anomaly = random.random() < anom_prob
    command_text, num_arguments = generate_command(is_anomaly)
    process_name, parent_process = select_process_and_parent(company_type="A")  # או "A" / "B"
    # אפשרות להתאמה קלה ל-process_name
 

    row = {
        "timestamp": random_timestamp(is_anomaly).strftime("%d/%m/%Y %H:%M:%S"),
        "host_id": random.choice(hosts),
        "user_id": random.choice(users),
        "user_role": random.choices(roles, weights=roles_probs, k=1)[0],
        "process_name": process_name,
        "parent_process": parent_process,
        "command_text": command_text,
        "script_type": process_name.replace(".exe", ""),
        "command_length": len(command_text),
        "num_arguments": num_arguments,
        "execution_result": random.choices(["success", "failure"], weights=exec_probs, k=1)[0],
        "is_anomaly": "suspicious" if is_anomaly else "benign"
    }
    data.append(row)

# DataFrame creation
df = pd.DataFrame(data)

# Export as CSV
df.to_csv("simulated_commands2.csv", index=False)