Skip to content

Commit

Permalink
feat: secureli 374 exclude binary files from secrets scans (#423)
Browse files Browse the repository at this point in the history
[secureli-374](#374)

<!-- Include general description here -->
Excludes binary files from scans.

## Changes
<!-- A detailed list of changes -->
* Added logic to skip binary files from scans
* Updated launch.json configs to avoid deprecated settings

## Testing
<!--
Mention updated tests and any manual testing performed.
Are aspects not yet tested or not easily testable?
Feel free to include screenshots if appropriate.
 -->
* All existing tests passing, some updating required
* Added unit tests to test detection of binary files to be excluded

## Clean Code Checklist
<!-- This is here to support you. Some/most checkboxes may not apply to
your change -->
- [x] Meets acceptance criteria for issue
- [ ] New logic is covered with automated tests
- [x] Appropriate exception handling added
- [ ] Thoughtful logging included
- [ ] Documentation is updated
- [ ] Follow-up work is documented in TODOs
- [ ] TODOs have a ticket associated with them
- [x] No commented-out code included


<!--
Github-flavored markdown reference:
https://docs.github.com/en/get-started/writing-on-github
-->
  • Loading branch information
isaac-heist-slalom committed Feb 8, 2024
1 parent 37c6942 commit 9500ae9
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 11 deletions.
10 changes: 5 additions & 5 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"configurations": [
{
"name": "Python: secureli --version",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/secureli/main.py",
"cwd": "${workspaceFolder}",
Expand All @@ -14,7 +14,7 @@
},
{
"name": "Python: secureli --help",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/secureli/main.py",
"cwd": "${workspaceFolder}",
Expand All @@ -25,7 +25,7 @@
},
{
"name": "Python: secureli init",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/secureli/main.py",
"cwd": "${workspaceFolder}",
Expand All @@ -36,7 +36,7 @@
},
{
"name": "Python: secureli scan",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/secureli/main.py",
"cwd": "${workspaceFolder}",
Expand All @@ -47,7 +47,7 @@
},
{
"name": "Python: secureli update",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/secureli/main.py",
"cwd": "${workspaceFolder}",
Expand Down
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ cfgv = "^3.3.1"
pre-commit = ">=2.20,<4.0"
requests = "^2.31.0"
pyyaml = ">=6.0.1"
chardet = "^5.2.0"

[tool.pytest.ini_options]
addopts = "-p no:cacheprovider"
Expand Down
30 changes: 26 additions & 4 deletions secureli/repositories/repo_files.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
from pathlib import Path
import re
import chardet

from secureli.utilities.patterns import combine_patterns


class BinaryFileError(ValueError):
"""
The loaded file was a binary and cannot be scanned.
"""

def __init__(self, message):
self.message = message
super().__init__(self.message)


class RepoFilesRepository:
"""
Loads files in a given repository, or raises ValueError if the provided path is not a git repo
Expand Down Expand Up @@ -82,13 +93,24 @@ def load_file(self, file_path: Path) -> str:
if not file_path.exists() or not file_path.is_file():
raise ValueError(f"File at path {file_path} did not exist")

if file_path.stat().st_size > self.max_file_size:
file_size = file_path.stat().st_size

if file_size > self.max_file_size:
raise ValueError(f"File at path {file_path} was too big to scan")

try:
with open(file_path, "r", encoding="utf8") as file_handle:
text = file_handle.read()
return text
with open(file_path, "rb") as file_handle:
data = file_handle.read()
encoding = chardet.detect(data)["encoding"]

# If resulting encoding is None, then it is binary
# Any file with zero size will be read as binary, so only skip binary files with size.
if encoding is None and file_size > 0:
raise BinaryFileError(f"File at path {file_path} is a binary file")

return data.decode("utf-8")
except BinaryFileError as e:
raise e
except IOError:
pass
except ValueError:
Expand Down
25 changes: 24 additions & 1 deletion tests/repositories/test_repo_files_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,14 @@ def good_folder_path() -> MagicMock:

@pytest.fixture()
def mock_open_resource(mocker: MockerFixture) -> MagicMock:
return mocker.patch("builtins.open", mocker.mock_open(read_data="sample_data"))
return mocker.patch("builtins.open", mocker.mock_open(read_data=b"sample_data"))


@pytest.fixture()
def mock_open_resource_with_binary_file(mocker: MockerFixture) -> MagicMock:
mocker.patch("builtins.open", mocker.mock_open(read_data=b"\x80\x81\x82\x83"))
mocker.patch("os.path.getsize", return_value=128)
return mocker


@pytest.fixture()
Expand Down Expand Up @@ -115,6 +122,12 @@ def value_error_occurs_file_path(mock_open_resource_with_value_error) -> MagicMo
return mock_file_path


@pytest.fixture()
def binary_file_with_size_file_path(mock_open_resource_with_binary_file) -> MagicMock:
mock_file_path = always_exists_path("folder/file.txt")
return mock_file_path


@pytest.fixture()
def repo_files_repository() -> RepoFilesRepository:
all_mov_files = "^(?:.+/)?[^/]*\\.mov(?:(?P<ps_d>/).*)?$"
Expand Down Expand Up @@ -183,3 +196,13 @@ def test_that_load_file_raises_value_error_for_file_if_value_error_occurs(
):
with pytest.raises(ValueError):
repo_files_repository.load_file(value_error_occurs_file_path)


def test_that_load_file_raises_value_error_for_binary_file_with_size(
repo_files_repository: RepoFilesRepository,
binary_file_with_size_file_path: MagicMock,
):
with pytest.raises(
ValueError, match="File at path folder/file.txt is a binary file"
):
repo_files_repository.load_file(binary_file_with_size_file_path)

0 comments on commit 9500ae9

Please sign in to comment.