Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate and store hashes for rom files #1005

Merged
merged 24 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions .trunk/trunk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ cli:
plugins:
sources:
- id: trunk
ref: v1.6.0
ref: v1.6.1
uri: https://github.com/trunk-io/plugins
# Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
runtimes:
Expand All @@ -19,24 +19,24 @@ runtimes:
lint:
enabled:
- markdownlint@0.41.0
- eslint@9.6.0
- eslint@9.8.0
- actionlint@1.7.1
- bandit@1.7.9
- black@24.4.2
- checkov@3.2.178
- black@24.8.0
- checkov@3.2.219
- git-diff-check
- isort@5.13.2
- mypy@1.10.1
- osv-scanner@1.8.1
- oxipng@9.1.1
- prettier@3.3.2
- ruff@0.5.1
- mypy@1.11.1
- osv-scanner@1.8.3
- oxipng@9.1.2
- prettier@3.3.3
- ruff@0.5.7
- shellcheck@0.10.0
- shfmt@3.6.0
- svgo@3.3.2
- taplo@0.8.1
- trivy@0.52.2
- trufflehog@3.79.0
- taplo@0.9.3
- trivy@0.54.1
- trufflehog@3.81.7
- yamllint@1.35.1
ignore:
- linters: [ALL]
Expand Down
42 changes: 42 additions & 0 deletions backend/alembic/versions/0025_roms_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""empty message

Revision ID: 0025_roms_hashes
Revises: 0024_sibling_roms_db_view
Create Date: 2024-08-11 21:50:53.301352

"""

import sqlalchemy as sa
from alembic import op
from config import IS_PYTEST_RUN, SCAN_TIMEOUT
from endpoints.sockets.scan import scan_platforms
from handler.redis_handler import high_prio_queue
from handler.scan_handler import ScanType

# revision identifiers, used by Alembic.
revision = "0025_roms_hashes"
down_revision = "0024_sibling_roms_db_view"
branch_labels = None
depends_on = None


def upgrade() -> None:
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.add_column(sa.Column("crc_hash", sa.String(length=100), nullable=True))
batch_op.add_column(sa.Column("md5_hash", sa.String(length=100), nullable=True))
batch_op.add_column(
sa.Column("sha1_hash", sa.String(length=100), nullable=True)
)

# Run a no-scan in the background on startup
if not IS_PYTEST_RUN:
high_prio_queue.enqueue(
scan_platforms, [], ScanType.HASH_SCAN, [], [], job_timeout=SCAN_TIMEOUT
)


def downgrade() -> None:
with op.batch_alter_table("roms", schema=None) as batch_op:
batch_op.drop_column("sha1_hash")
batch_op.drop_column("md5_hash")
batch_op.drop_column("crc_hash")
7 changes: 5 additions & 2 deletions backend/endpoints/responses/rom.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from handler.metadata.igdb_handler import IGDBMetadata
from handler.metadata.moby_handler import MobyMetadata
from handler.socket_handler import socket_handler
from models.rom import Rom
from models.rom import Rom, RomFile
from pydantic import BaseModel, Field, computed_field
from typing_extensions import TypedDict

Expand Down Expand Up @@ -108,7 +108,10 @@ class RomSchema(BaseModel):
tags: list[str]

multi: bool
files: list[str]
files: list[RomFile]
crc_hash: str | None
md5_hash: str | None
sha1_hash: str | None
full_path: str
created_at: datetime
updated_at: datetime
Expand Down
6 changes: 3 additions & 3 deletions backend/endpoints/rom.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ async def add_roms(

async with await open_file(file_location, "wb+") as f:
while True:
chunk = rom.file.read(1024)
chunk = rom.file.read(8192)
if not chunk:
break
await f.write(chunk)
Expand Down Expand Up @@ -174,7 +174,7 @@ def head_rom_content(request: Request, id: int, file_name: str):
rom_path = f"{LIBRARY_BASE_PATH}/{rom.full_path}"

return FileResponse(
path=rom_path if not rom.multi else f"{rom_path}/{rom.files[0]}",
path=rom_path if not rom.multi else f'{rom_path}/{rom.files[0]["filename"]}',
filename=file_name,
headers={
"Content-Disposition": f'attachment; filename="{quote(rom.name)}.zip"',
Expand Down Expand Up @@ -211,7 +211,7 @@ async def get_rom_content(
raise RomNotFoundInDatabaseException(id)

rom_path = f"{LIBRARY_BASE_PATH}/{rom.full_path}"
files_to_download = files or rom.files or []
files_to_download = files or [r["filename"] for r in rom.files]

if not rom.multi:
return FileResponse(path=rom_path, filename=rom.file_name)
Expand Down
16 changes: 12 additions & 4 deletions backend/endpoints/sockets/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
fs_resource_handler,
fs_rom_handler,
)
from handler.filesystem.roms_handler import FSRom
from handler.metadata.igdb_handler import IGDB_API_ENABLED
from handler.metadata.moby_handler import MOBY_API_ENABLED
from handler.redis_handler import high_prio_queue, redis_client, redis_url
Expand Down Expand Up @@ -81,6 +82,7 @@ def _should_scan_rom(scan_type: ScanType, rom: Rom, roms_ids: list):
return (
(scan_type in {ScanType.NEW_PLATFORMS, ScanType.QUICK} and not rom)
or (scan_type == ScanType.COMPLETE)
or (scan_type == ScanType.HASH_SCAN)
or (
rom
and (
Expand Down Expand Up @@ -224,7 +226,7 @@ async def _identify_platform(

# Scanning firmware
try:
fs_firmware = fs_firmware_handler.get_firmware(platform)
fs_firmware = fs_firmware_handler.get_firmware(platform.fs_slug)
except FirmwareNotFoundException:
fs_firmware = []

Expand All @@ -241,7 +243,7 @@ async def _identify_platform(

# Scanning roms
try:
fs_roms = fs_rom_handler.get_roms(platform)
fs_roms = fs_rom_handler.get_roms(platform.fs_slug)
except RomsNotFoundException as e:
log.error(e)
return scan_stats
Expand Down Expand Up @@ -302,7 +304,7 @@ async def _identify_firmware(

async def _identify_rom(
platform: Platform,
fs_rom: dict,
fs_rom: FSRom,
scan_type: ScanType,
roms_ids: list[str],
metadata_sources: list[str],
Expand All @@ -317,11 +319,17 @@ async def _identify_rom(
rom = db_rom_handler.get_rom_by_filename(platform.id, fs_rom["file_name"])

if not _should_scan_rom(scan_type=scan_type, rom=rom, roms_ids=roms_ids):
# Just to update the filesystem data
rom.file_name = fs_rom["file_name"]
rom.multi = fs_rom["multi"]
rom.files = fs_rom["files"]
db_rom_handler.add_rom(rom)

return scan_stats

scanned_rom = await scan_rom(
platform=platform,
rom_attrs=fs_rom,
fs_rom=fs_rom,
scan_type=scan_type,
rom=rom,
metadata_sources=metadata_sources,
Expand Down
26 changes: 16 additions & 10 deletions backend/handler/filesystem/firmware_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
)
from fastapi import UploadFile
from logger.logger import log
from models.platform import Platform
from utils.filesystem import iter_files

from .base_handler import FSHandler
Expand All @@ -27,21 +26,21 @@ def remove_file(self, file_name: str, file_path: str):
except IsADirectoryError:
shutil.rmtree(f"{LIBRARY_BASE_PATH}/{file_path}/{file_name}")

def get_firmware(self, platform: Platform):
def get_firmware(self, platform_fs_slug: str):
"""Gets all filesystem firmware for a platform

Args:
platform: platform where firmware belong
Returns:
list with all the filesystem firmware for a platform found in the LIBRARY_BASE_PATH
"""
firmware_path = self.get_firmware_fs_structure(platform.fs_slug)
firmware_path = self.get_firmware_fs_structure(platform_fs_slug)
firmware_file_path = f"{LIBRARY_BASE_PATH}/{firmware_path}"

try:
fs_firmware_files = [f for _, f in iter_files(firmware_file_path)]
except IndexError as exc:
raise FirmwareNotFoundException(platform.fs_slug) from exc
raise FirmwareNotFoundException(platform_fs_slug) from exc

return [f for f in self._exclude_files(fs_firmware_files, "single")]

Expand All @@ -51,13 +50,20 @@ def get_firmware_file_size(self, firmware_path: str, file_name: str):

def calculate_file_hashes(self, firmware_path: str, file_name: str):
with open(f"{LIBRARY_BASE_PATH}/{firmware_path}/{file_name}", "rb") as f:
data = f.read()
crc_c = 0
md5_h = hashlib.md5(usedforsecurity=False)
sha1_h = hashlib.sha1(usedforsecurity=False)

# Read in chunks to avoid memory issues
while chunk := f.read(8192):
md5_h.update(chunk)
sha1_h.update(chunk)
crc_c = binascii.crc32(chunk, crc_c)

return {
"crc_hash": (binascii.crc32(data) & 0xFFFFFFFF)
.to_bytes(4, byteorder="big")
.hex(),
"md5_hash": hashlib.md5(data, usedforsecurity=False).hexdigest(),
"sha1_hash": hashlib.sha1(data, usedforsecurity=False).hexdigest(),
"crc_hash": (crc_c & 0xFFFFFFFF).to_bytes(4, byteorder="big").hex(),
"md5_hash": md5_h.hexdigest(),
"sha1_hash": sha1_h.hexdigest(),
}

def file_exists(self, path: str, file_name: str):
Expand Down
Loading
Loading