From 4771a314f3158396666b5daeeaf1376aee3c3f66 Mon Sep 17 00:00:00 2001 From: nasbench Date: Tue, 7 Oct 2025 20:51:04 +0200 Subject: [PATCH 1/7] add fsutil symlinkevaluation dataset --- .../fsutil_symlink_eval/fsutil_symlink_eval.log | 3 +++ .../fsutil_symlink_eval/fsutil_symlink_eval.yml | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log create mode 100644 datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log new file mode 100644 index 00000000..4ae91ee9 --- /dev/null +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055f282a2c4f1396db511c10ed4016241072fcbfb78e8de64f6614601c554109 +size 1949 diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml new file mode 100644 index 00000000..cc7cd0aa --- /dev/null +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml @@ -0,0 +1,15 @@ +author: Nasreddine Bencherchali, Splunk +id: 169da367-87f4-413d-b0c9-b8c13e01d489 +date: '2025-10-07' +description: Generated datasets for fsutil SymlinkEvaluation set in attack range. +environment: attack_range +directory: fsutil_symlink_eval +mitre_technique: +- T1222.001 +dataset: +- name: fsutil_symlink_eval.log + path: /datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log + sourcetypes: XmlWinEventLog + source: 'XmlWinEventLog:Microsoft-Windows-Sysmon/Operational' +references: +- https://learn.microsoft.com/windows-server/administration/windows-commands/fsutil-behavior From e799e5828385e5340572fce2f7b9528034a68cae Mon Sep 17 00:00:00 2001 From: ljstella Date: Tue, 7 Oct 2025 15:13:29 -0400 Subject: [PATCH 2/7] Don't checkout things we don't need --- .github/workflows/validate.yml | 184 ++++++++++++++++----------------- 1 file changed, 91 insertions(+), 93 deletions(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index acce3ce8..1d9a0bc5 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -2,7 +2,7 @@ name: Validate Attack Data on: pull_request: - branches: [ master, main ] + branches: [master, main] types: [opened, synchronize, reopened] paths: - 'datasets/**/*.yml' @@ -11,7 +11,7 @@ on: - 'bin/dataset_schema.json' - 'bin/requirements.txt' push: - branches: [ master, main ] + branches: [master, main] paths: - 'datasets/**/*.yml' - 'datasets/**/*.yaml' @@ -27,97 +27,95 @@ permissions: jobs: validate-attack-data: runs-on: ubuntu-latest - + steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - lfs: true - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.9' - cache: 'pip' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r bin/requirements.txt - - # Validate all YAML files - - name: Validate all YAML files - run: | - python bin/validate.py - env: - PYTHONPATH: ${{ github.workspace }}/bin - - # PR-specific success/failure handling - - name: Comment PR on validation failure - if: failure() && github.event_name == 'pull_request' - uses: actions/github-script@v7 - with: - script: | - const { owner, repo, number } = context.issue; - - const body = `❌ **Attack Data Validation Failed** - - The YAML files in this PR do not pass validation. Please check the workflow logs for detailed error messages and fix the issues before merging. - - [View workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})`; - - await github.rest.issues.createComment({ - owner, - repo, - issue_number: number, - body: body - }); - - - name: Comment PR on validation success - if: success() && github.event_name == 'pull_request' - uses: actions/github-script@v7 - with: - script: | - const { owner, repo, number } = context.issue; - - const body = `✅ **Attack Data Validation Passed** - - All YAML files in this PR have been successfully validated against the schema. - - Ready for review and merge! 🚀`; - - await github.rest.issues.createComment({ - owner, - repo, - issue_number: number, - body: body - }); - - # Push-specific failure handling (create issue) - - name: Create issue on validation failure (Push) - if: failure() && github.event_name == 'push' - uses: actions/github-script@v7 - with: - script: | - const title = `🚨 Attack Data Validation Failed - ${new Date().toISOString().split('T')[0]}`; - const body = `**Validation failed on push to ${context.ref}** - - Commit: ${context.sha} - - The YAML files in the datasets directory do not pass validation. This indicates that invalid data has been merged into the main branch. - - **Action Required:** - 1. Review the [failed workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) - 2. Fix the validation errors - 3. Create a hotfix PR to resolve the issues - `; - - await github.rest.issues.create({ - owner: context.repo.owner, - repo: context.repo.repo, - title: title, - body: body, - labels: ['bug', 'validation-failure', 'high-priority'] - }); + - name: Checkout repository + uses: actions/checkout@v4 + with: + lfs: false + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r bin/requirements.txt + + # Validate all YAML files + - name: Validate all YAML files + run: | + python bin/validate.py + env: + PYTHONPATH: ${{ github.workspace }}/bin + + # PR-specific success/failure handling + - name: Comment PR on validation failure + if: failure() && github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const { owner, repo, number } = context.issue; + + const body = `❌ **Attack Data Validation Failed** + + The YAML files in this PR do not pass validation. Please check the workflow logs for detailed error messages and fix the issues before merging. + + [View workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})`; + + await github.rest.issues.createComment({ + owner, + repo, + issue_number: number, + body: body + }); + + - name: Comment PR on validation success + if: success() && github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const { owner, repo, number } = context.issue; + + const body = `✅ **Attack Data Validation Passed** + + All YAML files in this PR have been successfully validated against the schema. + + Ready for review and merge! 🚀`; + + await github.rest.issues.createComment({ + owner, + repo, + issue_number: number, + body: body + }); + + # Push-specific failure handling (create issue) + - name: Create issue on validation failure (Push) + if: failure() && github.event_name == 'push' + uses: actions/github-script@v7 + with: + script: | + const title = `🚨 Attack Data Validation Failed - ${new Date().toISOString().split('T')[0]}`; + const body = `**Validation failed on push to ${context.ref}** + + Commit: ${context.sha} + + The YAML files in the datasets directory do not pass validation. This indicates that invalid data has been merged into the main branch. + **Action Required:** + 1. Review the [failed workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) + 2. Fix the validation errors + 3. Create a hotfix PR to resolve the issues + `; + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: title, + body: body, + labels: ['bug', 'validation-failure', 'high-priority'] + }); From f6d29951f89c67e0b9789d4c1b13d6cee3c82dbe Mon Sep 17 00:00:00 2001 From: nasbench Date: Tue, 7 Oct 2025 21:25:21 +0200 Subject: [PATCH 3/7] update yml --- .../T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml index cc7cd0aa..0759b0d9 100644 --- a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml @@ -11,5 +11,3 @@ dataset: path: /datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log sourcetypes: XmlWinEventLog source: 'XmlWinEventLog:Microsoft-Windows-Sysmon/Operational' -references: -- https://learn.microsoft.com/windows-server/administration/windows-commands/fsutil-behavior From 47b4ebb3bd3c1242ba495328278b9188d30bc976 Mon Sep 17 00:00:00 2001 From: ljstella Date: Tue, 7 Oct 2025 15:31:07 -0400 Subject: [PATCH 4/7] New filter --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index a6e9819c..70a90aae 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ -*.json filter=lfs diff=lfs merge=lfs -text *.log filter=lfs diff=lfs merge=lfs -text *.log text encoding=utf-8 +datasets/**/*.json filter=lfs diff=lfs merge=lfs -text From 1ef0d6223cd49f476ebb111b74050f34ccf413d2 Mon Sep 17 00:00:00 2001 From: ljstella Date: Tue, 7 Oct 2025 15:31:37 -0400 Subject: [PATCH 5/7] Touch the schema --- bin/dataset_schema.json | 94 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/bin/dataset_schema.json b/bin/dataset_schema.json index 6acc39cf..8dc422af 100644 --- a/bin/dataset_schema.json +++ b/bin/dataset_schema.json @@ -1,3 +1,91 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:97085370d23378475c243e900bfeb0b462b849ff3e2b4f38fec5547177c91a3b -size 2274 +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Attack Data Dataset Schema", + "description": "JSON Schema for validating YAML dataset files in the attack_data project", + "type": "object", + "required": [ + "author", + "id", + "date", + "description", + "environment", + "datasets" + ], + "properties": { + "author": { + "type": "string", + "minLength": 1, + "description": "Author(s) of the dataset" + }, + "id": { + "type": "string", + "format": "uuid", + "description": "UUID identifier for the dataset" + }, + "date": { + "type": "string", + "description": "Date of the dataset" + }, + "description": { + "type": "string", + "minLength": 1, + "description": "Description of the dataset" + }, + "environment": { + "type": "string", + "minLength": 1, + "description": "Environment where the dataset was created" + }, + "directory": { + "type": "string", + "minLength": 1, + "description": "Directory name for the dataset" + }, + "mitre_technique": { + "type": "array", + "items": { + "type": "string", + "pattern": "^T\\d{4}(\\.\\d{3})*$" + }, + "description": "List of MITRE ATT&CK technique IDs (can be empty)" + }, + "datasets": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": [ + "name", + "path", + "source", + "sourcetype" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1, + "description": "Name of the dataset" + }, + "path": { + "type": "string", + "minLength": 1, + "description": "Path to the dataset file" + }, + "source": { + "type": "string", + "minLength": 1, + "description": "Source of the data" + }, + "sourcetype": { + "type": "string", + "minLength": 1, + "description": "Type of the data source" + } + }, + "additionalProperties": false + }, + "description": "List of datasets (must contain at least one dataset)" + } + }, + "additionalProperties": false +} From 678fc4d1de947baf703208c019ef936e1f954948 Mon Sep 17 00:00:00 2001 From: nasbench Date: Tue, 7 Oct 2025 22:28:35 +0200 Subject: [PATCH 6/7] fix yamls --- .../T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml | 2 +- datasets/ollama/ollama_server_data.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml index 0759b0d9..a4c74f5d 100644 --- a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml @@ -6,7 +6,7 @@ environment: attack_range directory: fsutil_symlink_eval mitre_technique: - T1222.001 -dataset: +datasets: - name: fsutil_symlink_eval.log path: /datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log sourcetypes: XmlWinEventLog diff --git a/datasets/ollama/ollama_server_data.yml b/datasets/ollama/ollama_server_data.yml index 677c6df2..8d752247 100644 --- a/datasets/ollama/ollama_server_data.yml +++ b/datasets/ollama/ollama_server_data.yml @@ -3,6 +3,7 @@ id: 9d1f6bd1-754a-481c-ad54-5a837f86d12b date: '2025-10-05' description: Logs from Ollama server, contain errors, system messages, http calls, methods and endpoint uris. mitre_technique: [] +environment: attack_range datasets: - name: ollama_server_data path: /datasets/ollama/server.log From a5010ccc342df264107a2b178b25b988b1e66f86 Mon Sep 17 00:00:00 2001 From: nasbench Date: Tue, 7 Oct 2025 22:29:44 +0200 Subject: [PATCH 7/7] add missing sourcetype field --- .../T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml index a4c74f5d..40158fc7 100644 --- a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml @@ -9,5 +9,5 @@ mitre_technique: datasets: - name: fsutil_symlink_eval.log path: /datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log - sourcetypes: XmlWinEventLog + sourcetype: XmlWinEventLog source: 'XmlWinEventLog:Microsoft-Windows-Sysmon/Operational'