In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

# Data Analysis - CSV


Data loading

In [None]:
# --- Data Loading ---
try:
    # Define the path to your log file
    # Make sure to upload ch10_windows_events.csv to your Colab environment
    csv_file_path = 'ch10_windows_events.csv'

    # Load the CSV, handling potential issues
    df = pd.read_csv(
        csv_file_path,
        encoding='utf-8',
        low_memory=False,
        na_values=['', 'N/A', 'null', 'NULL']
    )

    # Clean column names
    df.columns = df.columns.str.strip().str.replace('"', '')

    print(f"Successfully loaded {len(df)} records.")
    # Displaying the first few rows to confirm it's loaded correctly
    display(df.head())

except FileNotFoundError:
    print(f"Error: The file '{csv_file_path}' was not found.")
    print("Please make sure the file is uploaded to your Colab session.")
except Exception as e:
    print(f"An error occurred while loading the file: {e}")

Successfully loaded 5804 records.


Unnamed: 0,@timestamp,@sourcetype,LogonProcessName,EventRecordID,ProcessID,IpAddress,Computer,EventID,TargetUserName,TargetDomainName,TargetSid,LogonType,WorkstationName,CommandLine,NewProcessId,NewProcessName,ParentProcessName,ProcessId
0,1701741000.0,XmlWinEventLog:Security,,74262,4,,winhost01,4688,,,,,,"""C:\Program Files\SplunkUniversalForwarder\bin...",0x884,C:\Program Files\SplunkUniversalForwarder\bin\...,C:\Program Files\SplunkUniversalForwarder\bin\...,0x121c
1,1701741000.0,XmlWinEventLog:Security,,74263,4,,winhost01,4688,,,,,,"""C:\Program Files\SplunkUniversalForwarder\bin...",0xc40,C:\Program Files\SplunkUniversalForwarder\bin\...,C:\Program Files\SplunkUniversalForwarder\bin\...,0x121c
2,1701741000.0,XmlWinEventLog:Security,,1033421,584,,apidevdc01.example.com,4776,ADMINISTRATOR,,,,,,,,,
3,1701741000.0,XmlWinEventLog:Security,NtLmSsp,1033422,584,195.130.73.252,apidevdc01.example.com,4625,ADMINISTRATOR,,,3.0,,,,,,
4,1701741000.0,XmlWinEventLog:Security,,1033423,4,,apidevdc01.example.com,4703,admin,EXAMPLE,,,,,,,,0x153c


Search that breaks down the events by fields Computer and EventID

In [None]:
if 'df' in locals():
    # Filter for the specific user
    api_admin_events = df[df['TargetUserName'] == 'api_admin']

    if not api_admin_events.empty:
        # Group by Computer and EventID and get the count
        grouped_results = api_admin_events.groupby(['Computer', 'EventID']).size().reset_index(name='Count')

        print("Grouped results for TargetUserName: 'api_admin'")
        display(grouped_results)

    else:
        print("No records found for TargetUserName: 'api_admin'")
else:
    print("DataFrame 'df' not found. Please run the data loading cell (Cell 1) first.")

Grouped results for TargetUserName: 'api_admin'


Unnamed: 0,Computer,EventID,Count
0,apidevdc01.example.com,4625,4
1,apidevdc01.example.com,4776,4
2,winhost01,4648,4


Book output:

| Computer               | EventID | count |
|------------------------|---------|-------|
| apidevdc01.example.com | 4625    | 3     |
| apidevdc01.example.com | 4776    | 3     |
| winhost01              | 4648    | 3     |




In [None]:
if 'df' in locals():
    # Filter for the specific computer and event ID
    winhost_4688 = df[(df['Computer'] == 'winhost01') & (df['EventID'] == 4688)]

    # Exclude the Splunk forwarder process
    # The '~' inverts the selection, effectively removing these processes
    # .str.contains is used to find the substring; na=False prevents errors on empty cells
    filtered_events = winhost_4688[~winhost_4688['NewProcessName'].str.contains('SplunkUniversalForwarder', na=False)]

    if not filtered_events.empty:
        # Group by the CommandLine and count occurrences
        commandline_counts = filtered_events.groupby(['CommandLine']).size().reset_index(name='Count')

        # Sort for better readability
        commandline_counts = commandline_counts.sort_values(by='Count', ascending=False)

        print("Command Line execution counts on 'winhost01' for EventID 4688 (Splunk excluded):")
        display(commandline_counts)

    else:
        print("No matching events found after filtering.")
else:
    print("DataFrame 'df' not found. Please run the data loading cell (Cell 1) first.")

Command Line execution counts on 'winhost01' for EventID 4688 (Splunk excluded):


Unnamed: 0,CommandLine,Count
15,"""C:\Windows\System32\CredentialUIBroker.exe"" N...",5
54,C:\Windows\system32\DllHost.exe /Processid:{E1...,4
64,C:\Windows\system32\wbem\wmiprvse.exe -secured...,3
36,"""C:\Windows\system32\whoami.exe""",3
75,taskhostw.exe,3
...,...,...
72,rdpclip,1
74,sihost.exe,1
76,taskhostw.exe USER,1
77,taskhostw.exe {222A245B-E637-4AE9-A93F-A59CA11...,1


In [None]:
# Convert epoch seconds to pandas datetime
#    - unit='s' for seconds; if your values are in ms, use unit='ms'
df['@timestamp'] = pd.to_datetime(df['@timestamp'], unit='s', errors='coerce')

# Apply the same filters as before
mask = (
    (df['Computer'] == 'winhost01') &
    (df['EventID']   == 4688) &
    (~df['NewProcessName']
         .str.contains('SplunkUniversalForwarder', na=False))
)
filtered = df.loc[mask, ['@timestamp', 'CommandLine']].copy()

# Format to YYYY-MM-DD-HH:MM:SS
filtered['@timestamp'] = filtered['@timestamp'].dt.strftime('%Y-%m-%d-%H:%M:%S')

# Sort again because I like wasting processing power
filtered = filtered.sort_values(by='@timestamp')

# Print
print(f"Found {len(filtered)} matching events (timestamps humanized).")
display(filtered)

Found 100 matching events (timestamps humanized).


Unnamed: 0,@timestamp,CommandLine
4812,2023-12-05-01:16:02,"""C:\Program Files (x86)\Microsoft\Edge\Applica..."
3628,2023-12-05-01:29:30,"""C:\Program Files (x86)\Microsoft\Edge\Applica..."
3025,2023-12-05-01:46:07,"""C:\Windows\System32\sihclient.exe"""
3026,2023-12-05-01:46:07,\??\C:\Windows\system32\conhost.exe 0xffffffff...
1499,2023-12-05-02:08:48,"""C:\Windows\system32\whoami.exe"""
...,...,...
1334,2023-12-05-02:37:59,C:\Windows\system32\wbem\wmiprvse.exe -secured...
1340,2023-12-05-02:38:06,C:\Windows\system32\wbem\wmiprvse.exe -Embedding
1418,2023-12-05-02:38:59,C:\Windows\System32\InstallAgent.exe -Embedding
1417,2023-12-05-02:38:59,taskhostw.exe


Identical output to the one in the book for consistency.

In [None]:
# Find the *positional* location of label 2204
pos = filtered.index.get_indexer([2204])[0]

#  Slice from that row through the next 15 rows (inclusive)
window = filtered.iloc[pos : pos + 17].copy()

# Show them
print(f"Showing event at index=2204 and the next 15 events (total {len(window)} rows):")
display(window)


Showing event at index=2204 and the next 15 events (total 17 rows):


Unnamed: 0,@timestamp,CommandLine
2204,2023-12-05-02:20:21,"""C:\Users\user01\Downloads\mimikatz.exe"""
2337,2023-12-05-02:22:31,"""C:\Windows\system32\whoami.exe"""
2371,2023-12-05-02:22:48,"""C:\Users\user01\Downloads\mimikatz.exe"" privi..."
2372,2023-12-05-02:22:49,C:\Windows\system32\wbem\wmiprvse.exe -secured...
2480,2023-12-05-02:24:13,consent.exe 1200 468 00000226C6FA6F50
2481,2023-12-05-02:24:14,atbroker.exe
2484,2023-12-05-02:24:14,"""C:\Windows\System32\Sethc.exe"" /Accessibility..."
2486,2023-12-05-02:24:17,atbroker.exe
2606,2023-12-05-02:26:27,"""C:\Windows\system32\ipconfig.exe"""
645,2023-12-05-02:28:52,"""C:\Windows\system32\ARP.EXE"" -a"


# Exercises

##1. What is the local IP address of the AD domain controller, apidevdc01.example.com?

In [None]:
if 'df' in locals():
    # Ensure IpAddress is string so .str methods work safely —
    df['IpAddress'] = df['IpAddress'].astype(str)

    # Build the mask:
    mask = (
        (df['Computer'] == 'apidevdc01.example.com') &
        (df['IpAddress'].str.startswith('10.', na=False))
    )

    # Apply filter
    ipAddress_df = df.loc[mask, ['Computer', 'IpAddress', '@timestamp', 'CommandLine', 'EventID']]

    if not ipAddress_df.empty:
        # Group by IpAddress and count rows
        grouped_results = (
            ipAddress_df
            .groupby('IpAddress')
            .size()
            .reset_index(name='Count')
            .sort_values(by='Count', ascending=False)
        )

        print("Counts of events by IP address (10.*.*.*) on 'apidevdc01.example.com':")
        display(grouped_results)
    else:
        print("No records found for Computer = 'apidevdc01.example.com' with IP starting 10.*.*.*")
else:
    print("DataFrame 'df' not found. Please run the data loading cell (Cell 1) first.")


Counts of events by IP address (10.*.*.*) on 'apidevdc01.example.com':


Unnamed: 0,IpAddress,Count
0,10.128.0.24,18
1,10.128.0.25,4


##2. The adversary created an account on the compromised host, winhost01. What is the account name?

Reminder:

* Event 4624 - An account was successfully logged on.
* Event 4270 - A user account was created.
* Event 4732 - A member was added to a security-enabled local group.




In [None]:
if 'df' in locals():
    # Filter for the specific user
    adversary_events = df[(df['Computer'] == 'winhost01') & (df['EventID'] == 4720) ]

    if not adversary_events.empty:
        #
        display(adversary_events['TargetUserName'])

    else:
        print("No records found for TargetUserName: '*****'")
else:
    print("DataFrame 'df' not found. Please run the data loading cell (Cell 1) first.")

Unnamed: 0,TargetUserName
1019,api_test


In [None]:
display(adversary_events)

Unnamed: 0,@timestamp,@sourcetype,LogonProcessName,EventRecordID,ProcessID,IpAddress,Computer,EventID,TargetUserName,TargetDomainName,TargetSid,LogonType,WorkstationName,CommandLine,NewProcessId,NewProcessName,ParentProcessName,ProcessId
1019,2023-12-05 02:34:14.163714886,XmlWinEventLog:Security,,75059,624,,winhost01,4720,api_test,WINHOST01,WINHOST01\api_test,,,,,,,


It seems this result lack a bit of information compared to what the book result indicate.

The event contains the following information:

     o  Who created the user account (user01) -> No

     o  The new account’s name (api_test) -> OK

     o  Where the account was created (winhost01) -> OK

##3. What local user group was the account added to?

Searching events containing api_test grouped by Computer and EventID

In [None]:
if 'df' in locals():
    # Filter for events on the compromised host winhost01
    group_events = df[
        (df['Computer'] == 'winhost01')
    ]

    if not group_events.empty:
        # Count occurrences grouped by Computer and EventID
        event_counts = group_events.groupby(['Computer', 'EventID']).size().reset_index(name='Count')
        print("Event counts by Computer and EventID:")
        display(event_counts)

        # Look for events where the api_test account was added to a group
        api_test_group_events = group_events[
            group_events['TargetUserName'].str.contains('api_test', na=False, case=False)
        ]

    else:
        print("No Event ID 4732 records found on winhost01")
        print("Available Event IDs on winhost01:")
        winhost_events = df[df['Computer'] == 'winhost01']
else:
    print("DataFrame 'df' not found. Please run the data loading cell (Cell 1) first.")


Event counts by Computer and EventID:


Unnamed: 0,Computer,EventID,Count
0,winhost01,4624,8
1,winhost01,4625,1018
2,winhost01,4648,6
3,winhost01,4672,7
4,winhost01,4688,640
5,winhost01,4720,1
6,winhost01,4722,1
7,winhost01,4724,1
8,winhost01,4728,1
9,winhost01,4732,2


Event 4728: A member was added to a security-enabled global group

Event 4732: A member was added to a security-enabled local group

In [None]:
if 'df' in locals():
    # Filter for Event ID 4732 (member added to security-enabled local group)
    # on the compromised host winhost01
    group_events = df[
        (df['Computer'] == 'winhost01') &
        (df['EventID'] == 4732)
    ]

    if not group_events.empty:
        print("All Event ID 4732 events on winhost01:")
        print("=" * 50)

        # Display all available columns for these events to understand the structure
        display(group_events)

        print("\nAnalyzing group assignment events:")
        print("=" * 40)

        # Since we know api_test was created around the same time, let's look at the timing
        # and see if we can correlate with when the account was created

        # First, let's find when api_test was created (Event ID 4720)
        account_creation = df[
            (df['Computer'] == 'winhost01') &
            (df['EventID'] == 4720) &
            (df['TargetUserName'] == 'api_test')
        ]

        if not account_creation.empty:
            creation_time = account_creation['@timestamp'].iloc[0]
            print(f"api_test account was created at: {creation_time}")

            # Look for group assignments shortly after account creation
            print(f"\nGroup assignment events on winhost01:")
            for idx, row in group_events.iterrows():
                print(f"- Group: {row['TargetUserName']}")
                print(f"  Timestamp: {row['@timestamp']}")
                print(f"  Event Record ID: {row['EventRecordID']}")
                print()

        # Answer the question based on the context
        print("Based on the threat hunting context:")
        print("The adversary likely added the api_test account to the 'Administrators' group")
        print("for privilege escalation purposes.")

    else:
        print("No Event ID 4732 records found on winhost01")
else:
    print("DataFrame 'df' not found. Please run the data loading cell (Cell 1) first.")

All Event ID 4732 events on winhost01:


Unnamed: 0,@timestamp,@sourcetype,LogonProcessName,EventRecordID,ProcessID,IpAddress,Computer,EventID,TargetUserName,TargetDomainName,TargetSid,LogonType,WorkstationName,CommandLine,NewProcessId,NewProcessName,ParentProcessName,ProcessId
1023,2023-12-05 02:34:14.177087069,XmlWinEventLog:Security,,75063,624,,winhost01,4732,Users,Builtin,BUILTIN\Users,,,,,,,
1045,2023-12-05 02:34:33.446849108,XmlWinEventLog:Security,,75071,624,,winhost01,4732,Administrators,Builtin,BUILTIN\Administrators,,,,,,,



Analyzing group assignment events:
api_test account was created at: 2023-12-05 02:34:14.163714886

Group assignment events on winhost01:
- Group: Users
  Timestamp: 2023-12-05 02:34:14.177087069
  Event Record ID: 75063

- Group: Administrators
  Timestamp: 2023-12-05 02:34:33.446849108
  Event Record ID: 75071

Based on the threat hunting context:
The adversary likely added the api_test account to the 'Administrators' group
for privilege escalation purposes.
