In [None]:
#%pip install python-evtx lxml


Collecting python-evtx
  Downloading python_evtx-0.8.1-py3-none-any.whl.metadata (6.0 kB)
Collecting hexdump>=3.3 (from python-evtx)
  Downloading hexdump-3.3.zip (12 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hDownloading python_evtx-0.8.1-py3-none-any.whl (26 kB)
Building wheels for collected packages: hexdump
  Building wheel for hexdump (setup.py) ... [?25ldone
[?25h  Created wheel for hexdump: filename=hexdump-3.3-py3-none-any.whl size=8893 sha256=ae18f6ee982898245bcbc4f1040ad6476864a7f54b138808eb0f8c28e4da58b5
  Stored in directory: /Users/mandylee/Library/Caches/pip/wheels/5e/b9/b5/227a20e7e8bbdb2b17e46a087c0f0119059ee65fe8374cac18
Successfully built hexdump
Installing collected packages: hexdump, python-evtx
Successfully installed hexdump-3.3 python-evtx-0.8.1
Note: you may need to restart the kernel to use updated packages.


In [None]:
#Convert downloaded EVTX file to XML
import os
from Evtx.Evtx import Evtx
from lxml import etree

folder = "/Users/mandylee/Documents/CI/New Test"  

for filename in os.listdir(folder):
    if filename.endswith(".evtx"):
        evtx_path = os.path.join(folder, filename)
        xml_path = os.path.join(folder, filename.replace(".evtx", ".xml"))

        with Evtx(evtx_path) as log:  
            root = etree.Element("Events")
            for record in log.records():
                try:
                    event_xml = etree.fromstring(record.xml())
                    root.append(event_xml)
                except Exception:
                    continue

            tree = etree.ElementTree(root)
            tree.write(xml_path, pretty_print=True, encoding="utf-8")
        print(f"Converted {filename} → {xml_path}")

Converted ID4-OpenSSH brutforce with valid users.evtx → /Users/mandylee/Documents/CI/New Test/ID4-OpenSSH brutforce with valid users.xml
Converted ID4625-OpenSSH brutforce with valid users.evtx → /Users/mandylee/Documents/CI/New Test/ID4625-OpenSSH brutforce with valid users.xml


In [None]:
#Convert XML file to CSV

import xml.etree.ElementTree as ET
import pandas as pd

xml_file = "ID4625-OpenSSH brutforce with valid users.xml"  
tree = ET.parse(xml_file)
root = tree.getroot()

# Namespace handling
ns = {'ns': 'http://schemas.microsoft.com/win/2004/08/events/event'}

rows = []

# Iterate through all Event entries
for event in root.findall('ns:Event', ns):
    event_data = {}

    #System Fields
    system = event.find('ns:System', ns)
    if system is not None:
        for elem in system:
            tag = elem.tag.split('}')[-1]  # remove namespace
            if elem.attrib:
                for k, v in elem.attrib.items():
                    event_data[f"{tag}_{k}"] = v
            if elem.text and elem.text.strip():
                event_data[tag] = elem.text.strip()

    #EventData Fields 
    eventdata = event.find('ns:EventData', ns)
    if eventdata is not None:
        for data in eventdata.findall('ns:Data', ns):
            name = data.attrib.get('Name')
            value = data.text.strip() if data.text else ''
            event_data[name] = value

    rows.append(event_data)


df = pd.DataFrame(rows)
df.to_csv("ID4625-OpenSSH brutforce with valid users.csv", index=False, encoding='utf-8')

print("XML successfully converted")
print("Columns extracted:", len(df.columns))
print(df.head())



XML successfully converted
Columns extracted: 57
                         Provider_Name  \
0  Microsoft-Windows-Security-Auditing   
1  Microsoft-Windows-Security-Auditing   
2  Microsoft-Windows-Security-Auditing   
3  Microsoft-Windows-Security-Auditing   
4  Microsoft-Windows-Security-Auditing   

                            Provider_Guid EventID_Qualifiers EventID Version  \
0  {54849625-5478-4994-a5ba-3e3b0328c30d}                       4688       2   
1  {54849625-5478-4994-a5ba-3e3b0328c30d}                       4648       0   
2  {54849625-5478-4994-a5ba-3e3b0328c30d}                       4624       2   
3  {54849625-5478-4994-a5ba-3e3b0328c30d}                       4688       2   
4  {54849625-5478-4994-a5ba-3e3b0328c30d}                       4648       0   

  Level   Task Opcode            Keywords            TimeCreated_SystemTime  \
0     0  13312      0  0x8020000000000000  2021-05-21 20:43:07.085039+00:00   
1     0  12544      0  0x8020000000000000  2021-05-21 20:43

In [None]:
# Define and test LLM
import os
os.environ["OPENAI_API_KEY"] = "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"

from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

# llm = ChatOpenAI(
#     model="gpt-3.5-turbo",
#     # temperature=0
# )

llm = ChatOpenAI(
    model="gpt-4o",
    # temperature=0
)

response = llm.invoke([
    HumanMessage(content="What century are we in?")
])
print(response.content)

We are in the 21st century.


In [None]:
#Feed logs in CSV format into LLM and ask it to try identifying policy gaps as a check. 

import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Read windows logs
csv_path = "ID4625-OpenSSH brutforce with valid users.csv" 
logs_df = pd.read_csv(csv_path)

# Combine all rows and columns into one text block
log_features = logs_df.columns.tolist()
all_logs = "\n".join(
    ["; ".join([f"{c}={r[c]}" for c in log_features]) for _, r in logs_df.iterrows()]
)

# LLM prompt
prompt = ChatPromptTemplate.from_template(
    """You are a cybersecurity expert doing a post incident review for a companys admin personnels windows computer. 
    Review these windows event logs and identify security policy gaps with detailed evidence. 

Logs:
{all_logs}

Assistant:
- Generate 5 security policy gaps.
- For each policy gap, include detailed evidence: such as event IDs, timestamp, actions, IP addresses, and other relevant information from the logs to support the policy gap identification. 
"""
)

# Build generation chain
chain = prompt | llm | StrOutputParser()
analysis = chain.invoke({"all_logs": all_logs})

print("Log Analysis:")
print(analysis)

Log Analysis:
Analyzing the provided Windows event logs, the following security policy gaps have been identified:

1. **Lack of Multi-Factor Authentication (MFA) for Remote Access**:
   - **Evidence**: The logs show multiple logon attempts and failures using the OpenSSH service. Event IDs 4624, 4625, 4688, and 4648 all signify attempts to authenticate or execute processes remotely.
   - **Details**: 
     - Event ID 4625 at `2021-05-21 20:43:22` indicates a failed login attempt with account name `admmig@offsec.lan`.
     - Repeated failed login attempts (Event IDs: 1865227, 1865228, 1865229, 1865230, 1865231) suggest an effort to compromise the credentials.
   - **Policy Gap**: Adding MFA would add an additional layer of security to prevent unauthorized access even if the passwords are compromised.

2. **Insufficient Monitoring of Failed Login Attempts**:
   - **Evidence**: There are multiple failed login attempts in quick succession (Event ID 4625). These failed logins are not being f