Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified docs/notebooks/data/ip_entities.pkl
Binary file not shown.
Binary file modified docs/notebooks/data/linux_proc_test.pkl
Binary file not shown.
Binary file modified docs/notebooks/data/procs_with_cluster.pkl
Binary file not shown.
Binary file modified docs/notebooks/data/sent_incidents.pkl
Binary file not shown.
Binary file modified docs/notebooks/data/win_proc_test.pkl
Binary file not shown.
190 changes: 190 additions & 0 deletions msticpy/common/archive_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""Safe archive extraction utilities to prevent path traversal attacks."""

from __future__ import annotations

import logging
import sys
import tarfile
import zipfile
from pathlib import Path

from .exceptions import MsticpyUserError

logger: logging.Logger = logging.getLogger(__name__)


def validate_archive_member_path(
member_name: str,
dest_dir: str | Path,
) -> Path:
"""
Validate that an archive member path does not escape dest_dir.

Checks for absolute paths, parent directory references, and
resolved path containment within the destination directory.

Parameters
----------
member_name : str
The archive member name/path to validate.
dest_dir : str or Path
The intended extraction destination directory.

Returns
-------
Path
The resolved target path within dest_dir.

Raises
------
MsticpyUserError
If the member path would escape the destination directory.

"""
member_path = Path(member_name)
if member_path.is_absolute() or member_name.startswith("/"):
raise MsticpyUserError(
f"Archive member has an absolute path '{member_name}'.",
"This may indicate a malicious archive (path traversal attack).",
title="Unsafe archive member path",
)
if ".." in member_path.parts:
Comment thread
ianhelle marked this conversation as resolved.
raise MsticpyUserError(
f"Archive member contains parent directory reference: '{member_name}'.",
"This may indicate a malicious archive (path traversal attack).",
title="Unsafe archive member path",
)
dest = Path(dest_dir).resolve()
target = (dest / member_name).resolve()
if not target.is_relative_to(dest):
raise MsticpyUserError(
f"Archive member path escapes the destination directory: '{member_name}'.",
f"Resolved path '{target}' is outside '{dest}'.",
"This may indicate a malicious archive (path traversal attack).",
title="Unsafe archive member path",
)
logger.debug("Validated archive member path: %s", member_name)
return target


def safe_tar_extract(
tar: tarfile.TarFile,
member: tarfile.TarInfo,
dest_dir: str | Path,
) -> None:
"""
Safely extract a single tar archive member after path validation.

Validates that the member path does not escape dest_dir and
rejects symlinks or hardlinks that could be used for traversal.

Parameters
----------
tar : tarfile.TarFile
The open tar archive.
member : tarfile.TarInfo
The tar member to extract.
dest_dir : str or Path
The destination directory for extraction.

Raises
------
MsticpyUserError
If the member path is unsafe or the member is a
symlink/hardlink pointing outside dest_dir.

"""
if not (member.isreg() or member.isdir()):
if member.issym() or member.islnk():
_validate_tar_link(member, dest_dir)
else:
raise MsticpyUserError(
"Archive contains an unsupported member"
f" type: '{member.name}'"
f" (type={member.type!r}).",
"Only regular files and directories are allowed.",
title="Unsafe archive member type",
)
validate_archive_member_path(member.name, dest_dir)
if sys.version_info >= (3, 12):
tar.extract(member, dest_dir, filter="data")
else:
tar.extract(member, dest_dir)


def safe_zip_extract(
zip_file: zipfile.ZipFile,
file_name: str,
dest_dir: str | Path,
) -> None:
"""
Safely extract a single zip archive member after path validation.

Validates that the member path does not escape dest_dir.

Parameters
----------
zip_file : zipfile.ZipFile
The open zip archive.
file_name : str
The name of the file to extract.
dest_dir : str or Path
The destination directory for extraction.

Raises
------
MsticpyUserError
If the member path would escape the destination directory.

"""
validate_archive_member_path(file_name, dest_dir)
zip_file.extract(file_name, path=dest_dir)


def _validate_tar_link(
member: tarfile.TarInfo,
dest_dir: str | Path,
) -> None:
"""
Validate that a symlink or hardlink target is within dest_dir.

Parameters
----------
member : tarfile.TarInfo
The tar member (symlink or hardlink) to validate.
dest_dir : str or Path
The destination directory for extraction.

Raises
------
MsticpyUserError
If the link target escapes the destination directory.

"""
dest = Path(dest_dir).resolve()
link_target = member.linkname
if Path(link_target).is_absolute():
raise MsticpyUserError(
"Archive contains a link with an absolute"
f" target: '{member.name}'"
f" -> '{link_target}'.",
"This may indicate a malicious archive (path traversal attack).",
title="Unsafe archive link target",
)
# Resolve link target relative to the member's directory
member_dir = (dest / member.name).resolve().parent
resolved_link = (member_dir / link_target).resolve()
if not resolved_link.is_relative_to(dest):
raise MsticpyUserError(
"Archive contains a link that escapes the"
f" destination: '{member.name}'"
f" -> '{link_target}'.",
f"Resolved link target '{resolved_link}' is outside '{dest}'.",
"This may indicate a malicious archive (path traversal attack).",
title="Unsafe archive link target",
)
8 changes: 6 additions & 2 deletions msticpy/context/azure/sentinel_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ def create_analytic_rule( # pylint: disable=too-many-arguments, too-many-locals
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
logger.info("Analytic Created.")
return response.json().get("name")
Expand Down Expand Up @@ -355,7 +357,9 @@ def delete_analytic_rule(
case httpx.codes.CONFLICT:
raise ResourceExistsError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
logger.info("Analytic Deleted.")

Expand Down
8 changes: 2 additions & 6 deletions msticpy/context/azure/sentinel_bookmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,7 @@ def create_bookmark( # noqa:PLR0913
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
raise HttpResponseError(err_msg)

def delete_bookmark(
Expand Down Expand Up @@ -185,9 +183,7 @@ def delete_bookmark(
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
raise HttpResponseError(err_msg)

def _get_bookmark_id(self: Self, bookmark: str) -> str:
Expand Down
12 changes: 9 additions & 3 deletions msticpy/context/azure/sentinel_incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ def get_incident( # noqa:PLR0913
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)

incident_df: pd.DataFrame = _azs_api_result_to_df(response)
Expand Down Expand Up @@ -334,7 +336,9 @@ def update_incident(
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
logger.info("Incident updated.")
return response.json().get("name")
Expand Down Expand Up @@ -429,7 +433,9 @@ def create_incident( # pylint: disable=too-many-arguments, too-many-locals, too
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
if bookmarks:
for mark in bookmarks:
Expand Down
20 changes: 15 additions & 5 deletions msticpy/context/azure/sentinel_ti.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ def create_indicator( # pylint:disable=too-many-arguments, too-many-locals #noq
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
if not silent:
logger.info("Indicator created.")
Expand Down Expand Up @@ -336,7 +338,9 @@ def get_indicator(self: Self, indicator_id: str) -> dict:
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
return response.json()

Expand Down Expand Up @@ -436,7 +440,9 @@ def update_indicator( # pylint:disable=too-many-arguments,too-many-locals #noqa
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
logger.info("Indicator updated.")

Expand Down Expand Up @@ -501,7 +507,9 @@ def delete_indicator(self: Self, indicator_id: str) -> None:
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
logger.info("Indicator deleted.")

Expand Down Expand Up @@ -610,7 +618,9 @@ def query_indicators( # pylint:disable=too-many-arguments, too-many-locals, too
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
return _azs_api_result_to_df(response)

Expand Down
8 changes: 4 additions & 4 deletions msticpy/context/azure/sentinel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ def _list_items( # noqa:PLR0913 #pylint: disable=too-many-locals
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
j_resp: dict[str, Any] = response.json()
results: list[pd.DataFrame] = [results_df]
Expand Down Expand Up @@ -362,9 +364,7 @@ def parse_resource_id(res_id: str) -> dict[str, Any]:
"""Extract components from workspace resource ID."""
if not res_id.startswith("/"):
res_id = f"/{res_id}"
res_id_parts: dict[str, str] = cast(
dict[str, str], az_tools.parse_resource_id(res_id)
)
res_id_parts: dict[str, str] = cast(dict[str, str], az_tools.parse_resource_id(res_id))
workspace_name: str | None = None
if (
res_id_parts.get("namespace") == "Microsoft.OperationalInsights"
Expand Down
12 changes: 9 additions & 3 deletions msticpy/context/azure/sentinel_watchlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def create_watchlist( # noqa: PLR0913
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)

logger.info("Watchlist created.")
Expand Down Expand Up @@ -352,7 +354,9 @@ def delete_watchlist(
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)
logger.info("Watchlist %s deleted", watchlist_name)

Expand Down Expand Up @@ -413,7 +417,9 @@ def delete_watchlist_item(
case httpx.codes.NOT_MODIFIED:
raise ResourceNotModifiedError()
case _:
err_msg = f"Received HTTP return code {response.status_code}: {response.text}"
err_msg = (
f"Received HTTP return code {response.status_code}: {response.text}"
)
raise HttpResponseError(err_msg)

logger.info("Item deleted from %s", watchlist_name)
Expand Down
3 changes: 1 addition & 2 deletions msticpy/context/azure/sentinel_workspaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,7 @@ def _extract_resource_id(
raw_res_id: str = uri_match.groupdict()["res_id"]
raw_res_id = parse.unquote(raw_res_id)
res_components: dict[str, Any] = {
key: str(value)
for key, value in az_tools.parse_resource_id(raw_res_id).items()
key: str(value) for key, value in az_tools.parse_resource_id(raw_res_id).items()
}
try:
resource_id: str = cls._normalize_resource_id(res_components)
Expand Down
Loading
Loading