diff --git a/.gitattributes b/.gitattributes index a6e9819c..70a90aae 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ -*.json filter=lfs diff=lfs merge=lfs -text *.log filter=lfs diff=lfs merge=lfs -text *.log text encoding=utf-8 +datasets/**/*.json filter=lfs diff=lfs merge=lfs -text diff --git a/bin/dataset_schema.json b/bin/dataset_schema.json index 6acc39cf..8dc422af 100644 --- a/bin/dataset_schema.json +++ b/bin/dataset_schema.json @@ -1,3 +1,91 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:97085370d23378475c243e900bfeb0b462b849ff3e2b4f38fec5547177c91a3b -size 2274 +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Attack Data Dataset Schema", + "description": "JSON Schema for validating YAML dataset files in the attack_data project", + "type": "object", + "required": [ + "author", + "id", + "date", + "description", + "environment", + "datasets" + ], + "properties": { + "author": { + "type": "string", + "minLength": 1, + "description": "Author(s) of the dataset" + }, + "id": { + "type": "string", + "format": "uuid", + "description": "UUID identifier for the dataset" + }, + "date": { + "type": "string", + "description": "Date of the dataset" + }, + "description": { + "type": "string", + "minLength": 1, + "description": "Description of the dataset" + }, + "environment": { + "type": "string", + "minLength": 1, + "description": "Environment where the dataset was created" + }, + "directory": { + "type": "string", + "minLength": 1, + "description": "Directory name for the dataset" + }, + "mitre_technique": { + "type": "array", + "items": { + "type": "string", + "pattern": "^T\\d{4}(\\.\\d{3})*$" + }, + "description": "List of MITRE ATT&CK technique IDs (can be empty)" + }, + "datasets": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": [ + "name", + "path", + "source", + "sourcetype" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1, + "description": "Name of the dataset" + }, + "path": { + "type": "string", + "minLength": 1, + "description": "Path to the dataset file" + }, + "source": { + "type": "string", + "minLength": 1, + "description": "Source of the data" + }, + "sourcetype": { + "type": "string", + "minLength": 1, + "description": "Type of the data source" + } + }, + "additionalProperties": false + }, + "description": "List of datasets (must contain at least one dataset)" + } + }, + "additionalProperties": false +} diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log new file mode 100644 index 00000000..4ae91ee9 --- /dev/null +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055f282a2c4f1396db511c10ed4016241072fcbfb78e8de64f6614601c554109 +size 1949 diff --git a/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml new file mode 100644 index 00000000..40158fc7 --- /dev/null +++ b/datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.yml @@ -0,0 +1,13 @@ +author: Nasreddine Bencherchali, Splunk +id: 169da367-87f4-413d-b0c9-b8c13e01d489 +date: '2025-10-07' +description: Generated datasets for fsutil SymlinkEvaluation set in attack range. +environment: attack_range +directory: fsutil_symlink_eval +mitre_technique: +- T1222.001 +datasets: +- name: fsutil_symlink_eval.log + path: /datasets/attack_techniques/T1222.001/fsutil_symlink_eval/fsutil_symlink_eval.log + sourcetype: XmlWinEventLog + source: 'XmlWinEventLog:Microsoft-Windows-Sysmon/Operational' diff --git a/datasets/ollama/ollama_server_data.yml b/datasets/ollama/ollama_server_data.yml index 677c6df2..8d752247 100644 --- a/datasets/ollama/ollama_server_data.yml +++ b/datasets/ollama/ollama_server_data.yml @@ -3,6 +3,7 @@ id: 9d1f6bd1-754a-481c-ad54-5a837f86d12b date: '2025-10-05' description: Logs from Ollama server, contain errors, system messages, http calls, methods and endpoint uris. mitre_technique: [] +environment: attack_range datasets: - name: ollama_server_data path: /datasets/ollama/server.log