diff --git a/README.md b/README.md index 3729d6ae..12ad159d 100644 --- a/README.md +++ b/README.md @@ -107,9 +107,8 @@ llama_stack: url: http://localhost:8321 user_data_collection: feedback_enabled: true - feedback_storage: "/tmp/data/feedback" transcripts_enabled: true - transcripts_storage: "/tmp/data/transcripts" + user_data_dir: "/tmp/data" ``` ### Llama Stack project and configuration @@ -186,9 +185,8 @@ llama_stack: library_client_config_path: user_data_collection: feedback_enabled: true - feedback_storage: "/tmp/data/feedback" transcripts_enabled: true - transcripts_storage: "/tmp/data/transcripts" + user_data_dir: "/tmp/data" ``` ## System prompt @@ -438,9 +436,8 @@ The data collector service is configured through the `user_data_collection.data_ ```yaml user_data_collection: feedback_enabled: true - feedback_storage: "/tmp/data/feedback" transcripts_enabled: true - transcripts_storage: "/tmp/data/transcripts" + user_data_dir: "/tmp/data" data_collector: enabled: true ingress_server_url: "https://your-ingress-server.com" diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml index 01a36205..c8e91b5e 100644 --- a/lightspeed-stack.yaml +++ b/lightspeed-stack.yaml @@ -17,9 +17,8 @@ llama_stack: api_key: xyzzy user_data_collection: feedback_enabled: true - feedback_storage: "/tmp/data/feedback" transcripts_enabled: true - transcripts_storage: "/tmp/data/transcripts" + user_data_dir: "/tmp/data" data_collector: enabled: false ingress_server_url: null diff --git a/src/app/endpoints/feedback.py b/src/app/endpoints/feedback.py index 1fcbe656..ae053be9 100644 --- a/src/app/endpoints/feedback.py +++ b/src/app/endpoints/feedback.py @@ -2,7 +2,6 @@ import logging from typing import Any -from pathlib import Path import json from datetime import datetime, UTC @@ -109,19 +108,16 @@ def store_feedback(user_id: str, feedback: dict) -> None: feedback: The feedback to store. """ logger.debug("Storing feedback for user %s", user_id) - # Creates storage path only if it doesn't exist. The `exist_ok=True` prevents - # race conditions in case of multiple server instances trying to set up storage - # at the same location. - storage_path = Path( - configuration.user_data_collection_configuration.feedback_storage or "" - ) - storage_path.mkdir(parents=True, exist_ok=True) + + # Create feedback directory if it doesn't exist + feedback_dir = configuration.user_data_collection_configuration.feedback_storage + feedback_dir.mkdir(parents=True, exist_ok=True) current_time = str(datetime.now(UTC)) data_to_store = {"user_id": user_id, "timestamp": current_time, **feedback} # stores feedback in a file under unique uuid - feedback_file_path = storage_path / f"{get_suid()}.json" + feedback_file_path = feedback_dir / f"{get_suid()}.json" with open(feedback_file_path, "w", encoding="utf-8") as feedback_file: json.dump(data_to_store, feedback_file) diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index d7629a02..c9fb7b4e 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -350,9 +350,7 @@ def construct_transcripts_path(user_id: str, conversation_id: str) -> Path: # this Path sanitization pattern uid = os.path.normpath("/" + user_id).lstrip("/") cid = os.path.normpath("/" + conversation_id).lstrip("/") - file_path = ( - configuration.user_data_collection_configuration.transcripts_storage or "" - ) + file_path = configuration.user_data_collection_configuration.transcripts_storage return Path(file_path, uid, cid) diff --git a/src/lightspeed_stack.py b/src/lightspeed_stack.py index 4479e308..2a348df7 100644 --- a/src/lightspeed_stack.py +++ b/src/lightspeed_stack.py @@ -79,9 +79,7 @@ def main() -> None: if args.dump_configuration: configuration.configuration.dump() elif args.start_data_collector: - start_data_collector( - configuration.user_data_collection_configuration.data_collector - ) + start_data_collector(configuration.user_data_collection_configuration) else: start_uvicorn(configuration.service_configuration) logger.info("Lightspeed stack finished") diff --git a/src/models/config.py b/src/models/config.py index 90a53275..4d9ff7b7 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -101,48 +101,49 @@ def check_llama_stack_model(self) -> Self: return self -class DataCollectorConfiguration(BaseModel): - """Data collector configuration for sending data to ingress server.""" +class UserDataCollection(BaseModel): + """User data collection configuration.""" - enabled: bool = False - ingress_server_url: Optional[str] = None - ingress_server_auth_token: Optional[str] = None - ingress_content_service_name: Optional[str] = None - collection_interval: PositiveInt = constants.DATA_COLLECTOR_COLLECTION_INTERVAL - cleanup_after_send: bool = True # Remove local files after successful send - connection_timeout: PositiveInt = constants.DATA_COLLECTOR_CONNECTION_TIMEOUT + # data collection + feedback_enabled: bool = False + transcripts_enabled: bool = False + user_data_dir: Path = Path("user_data") + + # data export + export_enabled: bool = False + ingress_server_url: str = "" + ingress_server_auth_token: str = "" + ingress_content_service_name: str = "" + export_collection_interval: PositiveInt = ( + constants.DATA_COLLECTOR_COLLECTION_INTERVAL + ) + export_connection_timeout: PositiveInt = constants.DATA_COLLECTOR_CONNECTION_TIMEOUT + cleanup_after_send: bool = True # Remove local files after successful export + + @property + def feedback_storage(self) -> Path: + """Feedback storage directory path.""" + return self.user_data_dir / "feedback" + + @property + def transcripts_storage(self) -> Path: + """Transcripts storage directory path.""" + return self.user_data_dir / "transcripts" @model_validator(mode="after") def check_data_collector_configuration(self) -> Self: """Check data collector configuration.""" - if self.enabled and self.ingress_server_url is None: + if self.export_enabled and self.ingress_server_url == "": raise ValueError( - "ingress_server_url is required when data collector is enabled" + "ingress_server_url is required when data export is enabled" ) - if self.enabled and self.ingress_content_service_name is None: + if self.export_enabled and self.ingress_server_auth_token == "": raise ValueError( - "ingress_content_service_name is required when data collector is enabled" + "ingress_server_auth_token is required when data export is enabled" ) - return self - - -class UserDataCollection(BaseModel): - """User data collection configuration.""" - - feedback_enabled: bool = False - feedback_storage: Optional[str] = None - transcripts_enabled: bool = False - transcripts_storage: Optional[str] = None - data_collector: DataCollectorConfiguration = DataCollectorConfiguration() - - @model_validator(mode="after") - def check_storage_location_is_set_when_needed(self) -> Self: - """Check that storage_location is set when enabled.""" - if self.feedback_enabled and self.feedback_storage is None: - raise ValueError("feedback_storage is required when feedback is enabled") - if self.transcripts_enabled and self.transcripts_storage is None: + if self.export_enabled and self.ingress_content_service_name == "": raise ValueError( - "transcripts_storage is required when transcripts is enabled" + "ingress_content_service_name is required when data export is enabled" ) return self diff --git a/src/runners/data_collector.py b/src/runners/data_collector.py index 7bf05e8f..346eaeca 100644 --- a/src/runners/data_collector.py +++ b/src/runners/data_collector.py @@ -2,22 +2,31 @@ import logging -from models.config import DataCollectorConfiguration +from models.config import UserDataCollection from services.data_collector import DataCollectorService logger: logging.Logger = logging.getLogger(__name__) -def start_data_collector(configuration: DataCollectorConfiguration) -> None: +def start_data_collector(configuration: UserDataCollection) -> None: """Start the data collector service as a standalone process.""" logger.info("Starting data collector runner") - if not configuration.enabled: + if not configuration.export_enabled: logger.info("Data collection is disabled") return try: - service = DataCollectorService() + service = DataCollectorService( + feedback_dir=configuration.feedback_storage, + transcripts_dir=configuration.transcripts_storage, + collection_interval=configuration.export_collection_interval, + cleanup_after_send=configuration.cleanup_after_send, + ingress_server_url=configuration.ingress_server_url, + ingress_server_auth_token=configuration.ingress_server_auth_token, + ingress_content_service_name=configuration.ingress_content_service_name, + connection_timeout=configuration.export_connection_timeout, + ) service.run() except Exception as e: logger.error( diff --git a/src/services/data_collector.py b/src/services/data_collector.py index f87d2503..ed6bb3d1 100644 --- a/src/services/data_collector.py +++ b/src/services/data_collector.py @@ -9,12 +9,12 @@ import requests import constants -from configuration import configuration from log import get_logger logger = get_logger(__name__) +# pylint: disable=too-many-instance-attributes class DataCollectorService: # pylint: disable=too-few-public-methods """Service for collecting and sending user data to ingress server. @@ -22,12 +22,30 @@ class DataCollectorService: # pylint: disable=too-few-public-methods including feedback and transcripts to the configured ingress server. """ + # pylint: disable=too-many-arguments,too-many-positional-arguments + def __init__( + self, + feedback_dir: Path, + transcripts_dir: Path, + collection_interval: int, + ingress_server_url: str, + ingress_server_auth_token: str, + ingress_content_service_name: str, + connection_timeout: int, + cleanup_after_send: bool, + ) -> None: + """Initialize the data collector service.""" + self.feedback_dir = feedback_dir + self.transcripts_dir = transcripts_dir + self.collection_interval = collection_interval + self.ingress_server_url = ingress_server_url + self.ingress_server_auth_token = ingress_server_auth_token + self.ingress_content_service_name = ingress_content_service_name + self.connection_timeout = connection_timeout + self.cleanup_after_send = cleanup_after_send + def run(self) -> None: """Run the periodic data collection loop.""" - collector_config = ( - configuration.user_data_collection_configuration.data_collector - ) - logger.info("Starting data collection service") while True: @@ -35,10 +53,10 @@ def run(self) -> None: self._perform_collection() logger.info( "Next collection scheduled in %s seconds", - collector_config.collection_interval, + self.collection_interval, ) - if collector_config.collection_interval is not None: - time.sleep(collector_config.collection_interval) + if self.collection_interval is not None: + time.sleep(self.collection_interval) except KeyboardInterrupt: logger.info("Data collection service stopped by user") break @@ -70,19 +88,15 @@ def _perform_collection(self) -> None: collections_sent = 0 try: if feedback_files: - udc_config = configuration.user_data_collection_configuration - if udc_config.feedback_storage: - feedback_base = Path(udc_config.feedback_storage) - collections_sent += self._create_and_send_tarball( - feedback_files, "feedback", feedback_base - ) + feedback_base = self.feedback_dir + collections_sent += self._create_and_send_tarball( + feedback_files, "feedback", feedback_base + ) if transcript_files: - udc_config = configuration.user_data_collection_configuration - if udc_config.transcripts_storage: - transcript_base = Path(udc_config.transcripts_storage) - collections_sent += self._create_and_send_tarball( - transcript_files, "transcripts", transcript_base - ) + transcript_base = self.transcripts_dir + collections_sent += self._create_and_send_tarball( + transcript_files, "transcripts", transcript_base + ) logger.info( "Successfully sent %s collections to ingress server", collections_sent @@ -93,30 +107,18 @@ def _perform_collection(self) -> None: def _collect_feedback_files(self) -> List[Path]: """Collect all feedback files that need to be collected.""" - udc_config = configuration.user_data_collection_configuration - - if not udc_config.feedback_enabled or not udc_config.feedback_storage: - return [] - - feedback_dir = Path(udc_config.feedback_storage) - if not feedback_dir.exists(): + if not self.feedback_dir.exists(): return [] - return list(feedback_dir.glob("*.json")) + return list(self.feedback_dir.glob("*.json")) def _collect_transcript_files(self) -> List[Path]: """Collect all transcript files that need to be collected.""" - udc_config = configuration.user_data_collection_configuration - - if not udc_config.transcripts_enabled or not udc_config.transcripts_storage: - return [] - - transcripts_dir = Path(udc_config.transcripts_storage) - if not transcripts_dir.exists(): + if not self.transcripts_dir.exists(): return [] # Recursively find all JSON files in the transcript directory structure - return list(transcripts_dir.rglob("*.json")) + return list(self.transcripts_dir.rglob("*.json")) def _create_and_send_tarball( self, files: List[Path], data_type: str, base_directory: Path @@ -125,15 +127,11 @@ def _create_and_send_tarball( if not files: return 0 - collector_config = ( - configuration.user_data_collection_configuration.data_collector - ) - # Create one tarball with all files tarball_path = self._create_tarball(files, data_type, base_directory) try: self._send_tarball(tarball_path) - if collector_config.cleanup_after_send: + if self.cleanup_after_send: self._cleanup_files(files) self._cleanup_empty_directories() return 1 @@ -169,22 +167,13 @@ def _create_tarball( def _send_tarball(self, tarball_path: Path) -> None: """Send the tarball to the ingress server.""" - collector_config = ( - configuration.user_data_collection_configuration.data_collector - ) - - if collector_config.ingress_server_url is None: - raise ValueError("Ingress server URL is not configured") - # pylint: disable=line-too-long headers = { - "Content-Type": f"application/vnd.redhat.{collector_config.ingress_content_service_name}.periodic+tar", + "Content-Type": f"application/vnd.redhat.{self.ingress_content_service_name}.periodic+tar", } - if collector_config.ingress_server_auth_token: - headers["Authorization"] = ( - f"Bearer {collector_config.ingress_server_auth_token}" - ) + if self.ingress_server_auth_token: + headers["Authorization"] = f"Bearer {self.ingress_server_auth_token}" with open(tarball_path, "rb") as f: data = f.read() @@ -192,14 +181,14 @@ def _send_tarball(self, tarball_path: Path) -> None: logger.info( "Sending tarball %s to %s", tarball_path.name, - collector_config.ingress_server_url, + self.ingress_server_url, ) response = requests.post( - collector_config.ingress_server_url, + self.ingress_server_url, data=data, headers=headers, - timeout=collector_config.connection_timeout, + timeout=self.connection_timeout, ) if response.status_code >= 400: @@ -221,17 +210,11 @@ def _cleanup_files(self, files: List[Path]) -> None: def _cleanup_empty_directories(self) -> None: """Remove empty directories from transcript storage.""" - udc_config = configuration.user_data_collection_configuration - - if not udc_config.transcripts_enabled or not udc_config.transcripts_storage: - return - - transcripts_dir = Path(udc_config.transcripts_storage) - if not transcripts_dir.exists(): + if not self.transcripts_dir.exists(): return # Remove empty directories (conversation and user directories) - for user_dir in transcripts_dir.iterdir(): + for user_dir in self.transcripts_dir.iterdir(): if user_dir.is_dir(): for conv_dir in user_dir.iterdir(): if conv_dir.is_dir() and not any(conv_dir.iterdir()): diff --git a/tests/configuration/lightspeed-stack.yaml b/tests/configuration/lightspeed-stack.yaml index 1cf9565c..8f21cf5e 100644 --- a/tests/configuration/lightspeed-stack.yaml +++ b/tests/configuration/lightspeed-stack.yaml @@ -17,7 +17,7 @@ llama_stack: api_key: xyzzy user_data_collection: feedback_enabled: true - feedback_storage: "/tmp/data/feedback" + user_data_dir: "/tmp/ls_user_data" mcp_servers: - name: "server1" provider_id: "provider1" diff --git a/tests/integration/test_configuration.py b/tests/integration/test_configuration.py index 54b1f0cd..8ebad025 100644 --- a/tests/integration/test_configuration.py +++ b/tests/integration/test_configuration.py @@ -1,5 +1,7 @@ """Integration tests for configuration loading and handling.""" +from pathlib import Path + import pytest from configuration import configuration @@ -56,7 +58,7 @@ def test_loading_proper_configuration(configuration_filename: str) -> None: # check 'user_data_collection' section udc_config = cfg.user_data_collection_configuration assert udc_config.feedback_enabled is True - assert udc_config.feedback_storage == "/tmp/data/feedback" + assert udc_config.feedback_storage == Path("/tmp/ls_user_data/feedback") # check MCP servers section mcp_servers = cfg.mcp_servers diff --git a/tests/unit/app/endpoints/test_feedback.py b/tests/unit/app/endpoints/test_feedback.py index 1e2e5df2..2696266e 100644 --- a/tests/unit/app/endpoints/test_feedback.py +++ b/tests/unit/app/endpoints/test_feedback.py @@ -1,5 +1,7 @@ """Unit tests for the /feedback REST API endpoint.""" +from pathlib import Path + from fastapi import HTTPException, status import pytest @@ -96,10 +98,11 @@ def test_feedback_endpoint_handler_error(mocker): def test_store_feedback(mocker): """Test that store_feedback calls the correct storage function.""" - configuration.user_data_collection_configuration.feedback_storage = "fake-path" + configuration.user_data_collection_configuration.user_data_dir = Path("fake") mocker.patch("builtins.open", mocker.mock_open()) - mocker.patch("app.endpoints.feedback.Path", return_value=mocker.MagicMock()) + # Mock mkdir to prevent actual directory creation + mocker.patch.object(Path, "mkdir") mocker.patch("app.endpoints.feedback.get_suid", return_value="fake-uuid") # Mock the JSON to assert the data is stored correctly diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py index 8ace106a..db7711f6 100644 --- a/tests/unit/app/endpoints/test_query.py +++ b/tests/unit/app/endpoints/test_query.py @@ -2,6 +2,8 @@ # pylint: disable=too-many-lines +from pathlib import Path + import json from fastapi import HTTPException, status import pytest @@ -911,9 +913,7 @@ def test_retrieve_response_shield_violation(prepare_agent_mocks, mocker): def test_construct_transcripts_path(setup_configuration, mocker): """Test the construct_transcripts_path function.""" # Update configuration for this test - setup_configuration.user_data_collection_configuration.transcripts_storage = ( - "/tmp/transcripts" - ) + setup_configuration.user_data_collection_configuration.user_data_dir = Path("/tmp") mocker.patch("app.endpoints.query.configuration", setup_configuration) user_id = "user123" diff --git a/tests/unit/models/test_config.py b/tests/unit/models/test_config.py index 293f48a1..36399677 100644 --- a/tests/unit/models/test_config.py +++ b/tests/unit/models/test_config.py @@ -22,7 +22,6 @@ UserDataCollection, TLSConfiguration, ModelContextProtocolServer, - DataCollectorConfiguration, ) from utils.checks import InvalidConfigurationError @@ -134,53 +133,41 @@ def test_llama_stack_wrong_configuration_no_config_file() -> None: def test_user_data_collection_feedback_enabled() -> None: """Test the UserDataCollection constructor for feedback.""" # correct configuration - cfg = UserDataCollection(feedback_enabled=False, feedback_storage=None) + cfg = UserDataCollection(feedback_enabled=False) assert cfg is not None assert cfg.feedback_enabled is False - assert cfg.feedback_storage is None - - -def test_user_data_collection_feedback_disabled() -> None: - """Test the UserDataCollection constructor for feedback.""" - # incorrect configuration - with pytest.raises( - ValueError, - match="feedback_storage is required when feedback is enabled", - ): - UserDataCollection(feedback_enabled=True, feedback_storage=None) + assert cfg.user_data_dir == Path("user_data") + assert cfg.feedback_storage == Path("user_data/feedback") def test_user_data_collection_transcripts_enabled() -> None: """Test the UserDataCollection constructor for transcripts.""" # correct configuration - cfg = UserDataCollection(transcripts_enabled=False, transcripts_storage=None) + cfg = UserDataCollection(transcripts_enabled=False) assert cfg is not None + assert cfg.transcripts_enabled is False + assert cfg.user_data_dir == Path("user_data") + assert cfg.transcripts_storage == Path("user_data/transcripts") -def test_user_data_collection_transcripts_disabled() -> None: - """Test the UserDataCollection constructor for transcripts.""" - # incorrect configuration - with pytest.raises( - ValueError, - match="transcripts_storage is required when transcripts is enabled", - ): - UserDataCollection(transcripts_enabled=True, transcripts_storage=None) +def test_user_data_collection_custom_dir() -> None: + """Test the UserDataCollection constructor with custom directory.""" + cfg = UserDataCollection(user_data_dir=Path("/custom/path")) + assert cfg.feedback_storage == Path("/custom/path/feedback") + assert cfg.transcripts_storage == Path("/custom/path/transcripts") def test_user_data_collection_data_collector_enabled() -> None: """Test the UserDataCollection constructor for data collector.""" # correct configuration cfg = UserDataCollection( - data_collector=DataCollectorConfiguration( - enabled=True, - ingress_server_url="http://localhost:8080", - ingress_server_auth_token="xyzzy", - ingress_content_service_name="lightspeed-core", - collection_interval=60, - ) + export_enabled=True, + ingress_server_url="http://localhost:8080", + ingress_server_auth_token="xyzzy", + ingress_content_service_name="lightspeed-core", ) assert cfg is not None - assert cfg.data_collector.enabled is True + assert cfg.export_enabled is True def test_user_data_collection_data_collector_wrong_configuration() -> None: @@ -188,29 +175,33 @@ def test_user_data_collection_data_collector_wrong_configuration() -> None: # incorrect configuration with pytest.raises( ValueError, - match="ingress_server_url is required when data collector is enabled", + match="ingress_server_url is required when data export is enabled", + ): + UserDataCollection( + export_enabled=True, + ingress_server_url="", + ingress_server_auth_token="xyzzy", + ingress_content_service_name="lightspeed-core", + ) + with pytest.raises( + ValueError, + match="ingress_content_service_name is required when data export is enabled", ): UserDataCollection( - data_collector=DataCollectorConfiguration( - enabled=True, - ingress_server_url=None, - ingress_server_auth_token="xyzzy", - ingress_content_service_name="lightspeed-core", - collection_interval=60, - ) + export_enabled=True, + ingress_server_url="http://localhost:8080", + ingress_server_auth_token="xyzzy", + ingress_content_service_name="", ) with pytest.raises( ValueError, - match="ingress_content_service_name is required when data collector is enabled", + match="ingress_server_auth_token is required when data export is enabled", ): UserDataCollection( - data_collector=DataCollectorConfiguration( - enabled=True, - ingress_server_url="http://localhost:8080", - ingress_server_auth_token="xyzzy", - ingress_content_service_name=None, - collection_interval=60, - ) + export_enabled=True, + ingress_server_url="http://localhost:8080", + ingress_server_auth_token="", + ingress_content_service_name="lightspeed-core", ) @@ -337,9 +328,7 @@ def test_configuration_empty_mcp_servers() -> None: use_as_library_client=True, library_client_config_path="tests/configuration/run.yaml", ), - user_data_collection=UserDataCollection( - feedback_enabled=False, feedback_storage=None - ), + user_data_collection=UserDataCollection(feedback_enabled=False), mcp_servers=[], customization=None, ) @@ -362,9 +351,7 @@ def test_configuration_single_mcp_server() -> None: use_as_library_client=True, library_client_config_path="tests/configuration/run.yaml", ), - user_data_collection=UserDataCollection( - feedback_enabled=False, feedback_storage=None - ), + user_data_collection=UserDataCollection(feedback_enabled=False), mcp_servers=[mcp_server], customization=None, ) @@ -394,9 +381,7 @@ def test_configuration_multiple_mcp_servers() -> None: use_as_library_client=True, library_client_config_path="tests/configuration/run.yaml", ), - user_data_collection=UserDataCollection( - feedback_enabled=False, feedback_storage=None - ), + user_data_collection=UserDataCollection(feedback_enabled=False), mcp_servers=mcp_servers, customization=None, ) @@ -421,9 +406,7 @@ def test_dump_configuration(tmp_path) -> None: use_as_library_client=True, library_client_config_path="tests/configuration/run.yaml", ), - user_data_collection=UserDataCollection( - feedback_enabled=False, feedback_storage=None - ), + user_data_collection=UserDataCollection(feedback_enabled=False), mcp_servers=[], customization=None, ) @@ -468,18 +451,15 @@ def test_dump_configuration(tmp_path) -> None: }, "user_data_collection": { "feedback_enabled": False, - "feedback_storage": None, "transcripts_enabled": False, - "transcripts_storage": None, - "data_collector": { - "enabled": False, - "ingress_server_url": None, - "ingress_server_auth_token": None, - "ingress_content_service_name": None, - "collection_interval": DATA_COLLECTOR_COLLECTION_INTERVAL, - "cleanup_after_send": True, - "connection_timeout": DATA_COLLECTOR_CONNECTION_TIMEOUT, - }, + "user_data_dir": "user_data", + "export_enabled": False, + "ingress_server_url": "", + "ingress_server_auth_token": "", + "ingress_content_service_name": "", + "export_collection_interval": DATA_COLLECTOR_COLLECTION_INTERVAL, + "export_connection_timeout": DATA_COLLECTOR_CONNECTION_TIMEOUT, + "cleanup_after_send": True, }, "mcp_servers": [], "authentication": { @@ -511,9 +491,7 @@ def test_dump_configuration_with_one_mcp_server(tmp_path) -> None: use_as_library_client=True, library_client_config_path="tests/configuration/run.yaml", ), - user_data_collection=UserDataCollection( - feedback_enabled=False, feedback_storage=None - ), + user_data_collection=UserDataCollection(feedback_enabled=False), mcp_servers=mcp_servers, customization=None, ) @@ -553,18 +531,15 @@ def test_dump_configuration_with_one_mcp_server(tmp_path) -> None: }, "user_data_collection": { "feedback_enabled": False, - "feedback_storage": None, "transcripts_enabled": False, - "transcripts_storage": None, - "data_collector": { - "enabled": False, - "ingress_server_url": None, - "ingress_server_auth_token": None, - "ingress_content_service_name": None, - "collection_interval": DATA_COLLECTOR_COLLECTION_INTERVAL, - "cleanup_after_send": True, - "connection_timeout": DATA_COLLECTOR_CONNECTION_TIMEOUT, - }, + "user_data_dir": "user_data", + "export_enabled": False, + "ingress_server_url": "", + "ingress_server_auth_token": "", + "ingress_content_service_name": "", + "export_collection_interval": DATA_COLLECTOR_COLLECTION_INTERVAL, + "export_connection_timeout": DATA_COLLECTOR_CONNECTION_TIMEOUT, + "cleanup_after_send": True, }, "mcp_servers": [ { @@ -605,9 +580,7 @@ def test_dump_configuration_with_more_mcp_servers(tmp_path) -> None: use_as_library_client=True, library_client_config_path="tests/configuration/run.yaml", ), - user_data_collection=UserDataCollection( - feedback_enabled=False, feedback_storage=None - ), + user_data_collection=UserDataCollection(feedback_enabled=False), mcp_servers=mcp_servers, customization=None, ) @@ -653,18 +626,15 @@ def test_dump_configuration_with_more_mcp_servers(tmp_path) -> None: }, "user_data_collection": { "feedback_enabled": False, - "feedback_storage": None, "transcripts_enabled": False, - "transcripts_storage": None, - "data_collector": { - "enabled": False, - "ingress_server_url": None, - "ingress_server_auth_token": None, - "ingress_content_service_name": None, - "collection_interval": DATA_COLLECTOR_COLLECTION_INTERVAL, - "cleanup_after_send": True, - "connection_timeout": DATA_COLLECTOR_CONNECTION_TIMEOUT, - }, + "user_data_dir": "user_data", + "export_enabled": False, + "ingress_server_url": "", + "ingress_server_auth_token": "", + "ingress_content_service_name": "", + "export_collection_interval": DATA_COLLECTOR_COLLECTION_INTERVAL, + "export_connection_timeout": DATA_COLLECTOR_CONNECTION_TIMEOUT, + "cleanup_after_send": True, }, "mcp_servers": [ { diff --git a/tests/unit/runners/test_data_collector_runner.py b/tests/unit/runners/test_data_collector_runner.py index 4384b8ab..14903304 100644 --- a/tests/unit/runners/test_data_collector_runner.py +++ b/tests/unit/runners/test_data_collector_runner.py @@ -2,18 +2,17 @@ from unittest.mock import patch -from models.config import DataCollectorConfiguration +from models.config import UserDataCollection from runners.data_collector import start_data_collector def test_start_data_collector() -> None: """Test the function to start data collector service.""" - configuration = DataCollectorConfiguration( - enabled=True, + configuration = UserDataCollection( + export_enabled=True, ingress_server_url="http://localhost:8080", ingress_server_auth_token="xyzzy", ingress_content_service_name="lightspeed-core", - collection_interval=60, ) # don't start real data collector service @@ -24,12 +23,11 @@ def test_start_data_collector() -> None: def test_start_data_collector_disabled() -> None: """Test the function to start data collector service.""" - configuration = DataCollectorConfiguration( - enabled=False, + configuration = UserDataCollection( + export_enabled=False, ingress_server_url="http://localhost:8080", ingress_server_auth_token="xyzzy", ingress_content_service_name="lightspeed-core", - collection_interval=60, ) # don't start real data collector service @@ -40,12 +38,11 @@ def test_start_data_collector_disabled() -> None: def test_start_data_collector_exception() -> None: """Test the function to start data collector service when an exception occurs.""" - configuration = DataCollectorConfiguration( - enabled=True, + configuration = UserDataCollection( + export_enabled=True, ingress_server_url="http://localhost:8080", ingress_server_auth_token="xyzzy", ingress_content_service_name="lightspeed-core", - collection_interval=60, ) # Mock the DataCollectorService to raise an exception diff --git a/tests/unit/services/test_data_collector.py b/tests/unit/services/test_data_collector.py index b2103ad5..15c157b2 100644 --- a/tests/unit/services/test_data_collector.py +++ b/tests/unit/services/test_data_collector.py @@ -4,24 +4,38 @@ from unittest.mock import patch, MagicMock import requests import tarfile - from services.data_collector import DataCollectorService +def _create_test_service(**kwargs) -> DataCollectorService: + """Create a DataCollectorService instance with default test parameters.""" + defaults = { + "feedback_dir": Path("/tmp/feedback"), + "transcripts_dir": Path("/tmp/transcripts"), + "collection_interval": 60, + "ingress_server_url": "http://test-server.com", + "ingress_server_auth_token": "test-token", + "ingress_content_service_name": "test-service", + "connection_timeout": 30, + "cleanup_after_send": True, + } + defaults.update(kwargs) + return DataCollectorService(**defaults) + + def test_data_collector_service_creation() -> None: """Test that DataCollectorService can be created.""" - service = DataCollectorService() + service = _create_test_service() assert service is not None + assert service.feedback_dir == Path("/tmp/feedback") + assert service.transcripts_dir == Path("/tmp/transcripts") + assert service.collection_interval == 60 @patch("services.data_collector.time.sleep") -@patch("services.data_collector.configuration") -def test_run_normal_operation(mock_config, mock_sleep) -> None: +def test_run_normal_operation(mock_sleep) -> None: """Test normal operation of the run method.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.data_collector.collection_interval = ( - 60 - ) + service = _create_test_service() with patch.object(service, "_perform_collection") as mock_perform: mock_perform.side_effect = [None, KeyboardInterrupt()] @@ -33,10 +47,9 @@ def test_run_normal_operation(mock_config, mock_sleep) -> None: @patch("services.data_collector.time.sleep") -@patch("services.data_collector.configuration") -def test_run_with_exception(mock_config, mock_sleep) -> None: +def test_run_with_exception(mock_sleep) -> None: """Test run method with exception handling.""" - service = DataCollectorService() + service = _create_test_service() with patch.object(service, "_perform_collection") as mock_perform: mock_perform.side_effect = [OSError("Test error"), KeyboardInterrupt()] @@ -44,544 +57,470 @@ def test_run_with_exception(mock_config, mock_sleep) -> None: service.run() assert mock_perform.call_count == 2 - mock_sleep.assert_called_once_with(300) - - -@patch("services.data_collector.configuration") -def test_collect_feedback_files_disabled(mock_config) -> None: - """Test collecting feedback files when disabled.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.feedback_enabled = False - - result = service._collect_feedback_files() - assert result == [] + mock_sleep.assert_called_once_with( + 300 + ) # constants.DATA_COLLECTOR_RETRY_INTERVAL -@patch("services.data_collector.configuration") -def test_collect_feedback_files_no_storage(mock_config) -> None: - """Test collecting feedback files when no storage configured.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.feedback_enabled = True - mock_config.user_data_collection_configuration.feedback_storage = None +def test_collect_feedback_files_directory_not_exists() -> None: + """Test collecting feedback files when directory doesn't exist.""" + service = _create_test_service(feedback_dir=Path("/nonexistent/feedback")) result = service._collect_feedback_files() assert result == [] -@patch("services.data_collector.configuration") -def test_collect_feedback_files_directory_not_exists(mock_config) -> None: - """Test collecting feedback files when directory doesn't exist.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.feedback_enabled = True - mock_config.user_data_collection_configuration.feedback_storage = "/tmp/feedback" - - with patch("services.data_collector.Path") as mock_path: - mock_path.return_value.exists.return_value = False - - result = service._collect_feedback_files() - assert result == [] - - -@patch("services.data_collector.configuration") -def test_collect_feedback_files_success(mock_config) -> None: +def test_collect_feedback_files_success() -> None: """Test collecting feedback files successfully.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.feedback_enabled = True - mock_config.user_data_collection_configuration.feedback_storage = "/tmp/feedback" - - mock_files = [Path("/tmp/feedback/file1.json")] + service = _create_test_service() + mock_files = [Path("/tmp/feedback/file1.json"), Path("/tmp/feedback/file2.json")] - with patch("services.data_collector.Path") as mock_path: - mock_path.return_value.exists.return_value = True - mock_path.return_value.glob.return_value = mock_files + with ( + patch("pathlib.Path.exists", return_value=True), + patch("pathlib.Path.glob", return_value=mock_files) as mock_glob, # noqa: F841 + ): result = service._collect_feedback_files() assert result == mock_files -@patch("services.data_collector.configuration") -def test_collect_transcript_files_disabled(mock_config) -> None: - """Test collecting transcript files when disabled.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = False +def test_collect_transcript_files_directory_not_exists() -> None: + """Test collecting transcript files when directory doesn't exist.""" + service = _create_test_service(transcripts_dir=Path("/nonexistent/transcripts")) result = service._collect_transcript_files() assert result == [] -@patch("services.data_collector.configuration") -def test_collect_transcript_files_directory_not_exists(mock_config) -> None: - """Test collecting transcript files when directory doesn't exist.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = True - mock_config.user_data_collection_configuration.transcripts_storage = ( - "/tmp/transcripts" - ) - - with patch("services.data_collector.Path") as mock_path: - mock_path.return_value.exists.return_value = False - - result = service._collect_transcript_files() - assert result == [] - - -@patch("services.data_collector.configuration") -def test_collect_transcript_files_success(mock_config) -> None: +def test_collect_transcript_files_success() -> None: """Test collecting transcript files successfully.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = True - mock_config.user_data_collection_configuration.transcripts_storage = ( - "/tmp/transcripts" - ) - + service = _create_test_service() mock_files = [Path("/tmp/transcripts/user1/conv1/file1.json")] - with patch("services.data_collector.Path") as mock_path: - mock_path.return_value.exists.return_value = True - mock_path.return_value.rglob.return_value = mock_files + with ( + patch("pathlib.Path.exists", return_value=True), + patch( + "pathlib.Path.rglob", return_value=mock_files + ) as mock_rglob, # noqa: F841 + ): result = service._collect_transcript_files() assert result == mock_files -@patch("services.data_collector.configuration") -def test_perform_collection_no_files(mock_config) -> None: - """Test _perform_collection when no files are found.""" - service = DataCollectorService() +def test_perform_collection_no_files() -> None: + """Test perform collection when no files are found.""" + service = _create_test_service() with ( patch.object(service, "_collect_feedback_files", return_value=[]), patch.object(service, "_collect_transcript_files", return_value=[]), ): - service._perform_collection() + # Should not raise any exceptions and should return early + service._perform_collection() -@patch("services.data_collector.configuration") -def test_perform_collection_with_files(mock_config) -> None: - """Test _perform_collection when files are found.""" - service = DataCollectorService() +def test_perform_collection_with_files() -> None: + """Test perform collection with files.""" + service = _create_test_service() feedback_files = [Path("/tmp/feedback/file1.json")] + transcript_files = [Path("/tmp/transcripts/user1/conv1/file1.json")] with ( patch.object(service, "_collect_feedback_files", return_value=feedback_files), - patch.object(service, "_collect_transcript_files", return_value=[]), - patch.object(service, "_create_and_send_tarball", return_value=1), + patch.object( + service, "_collect_transcript_files", return_value=transcript_files + ), + patch.object( + service, "_create_and_send_tarball", return_value=1 + ) as mock_create_send, ): + service._perform_collection() + # Should be called once for feedback and once for transcripts + assert mock_create_send.call_count == 2 + mock_create_send.assert_any_call( + feedback_files, "feedback", service.feedback_dir + ) + mock_create_send.assert_any_call( + transcript_files, "transcripts", service.transcripts_dir + ) -@patch("services.data_collector.configuration") -def test_perform_collection_with_exception(mock_config) -> None: - """Test _perform_collection when an exception occurs.""" - service = DataCollectorService() + +def test_perform_collection_with_exception() -> None: + """Test perform collection with exception.""" + service = _create_test_service() + feedback_files = [Path("/tmp/feedback/file1.json")] with ( - patch.object( - service, "_collect_feedback_files", return_value=[Path("/tmp/test.json")] - ), + patch.object(service, "_collect_feedback_files", return_value=feedback_files), patch.object(service, "_collect_transcript_files", return_value=[]), patch.object( - service, "_create_and_send_tarball", side_effect=Exception("Test error") + service, "_create_and_send_tarball", side_effect=OSError("Test error") ), ): + + # Should re-raise the exception try: service._perform_collection() - assert False, "Expected exception" - except Exception as e: + assert False, "Expected OSError to be raised" + except OSError as e: assert str(e) == "Test error" -@patch("services.data_collector.configuration") -def test_create_and_send_tarball_no_files(mock_config) -> None: - """Test creating tarball with no files.""" - service = DataCollectorService() +def test_create_and_send_tarball_no_files() -> None: + """Test create and send tarball with no files.""" + service = _create_test_service() - result = service._create_and_send_tarball([], "test", Path("/tmp")) + result = service._create_and_send_tarball([], "feedback", service.feedback_dir) assert result == 0 -@patch("services.data_collector.configuration") -def test_create_and_send_tarball_success(mock_config) -> None: - """Test creating and sending tarball successfully.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.data_collector.cleanup_after_send = ( - True - ) - - files = [Path("/tmp/test/file1.json")] - tarball_path = Path("/tmp/test_tarball.tar.gz") +def test_create_and_send_tarball_success() -> None: + """Test create and send tarball successfully.""" + service = _create_test_service() + files = [Path("/tmp/feedback/file1.json")] with ( - patch.object(service, "_create_tarball", return_value=tarball_path), - patch.object(service, "_send_tarball"), - patch.object(service, "_cleanup_files"), - patch.object(service, "_cleanup_empty_directories"), - patch.object(service, "_cleanup_tarball"), + patch.object( + service, "_create_tarball", return_value=Path("/tmp/test.tar.gz") + ) as mock_create, + patch.object(service, "_send_tarball") as mock_send, + patch.object(service, "_cleanup_files") as mock_cleanup_files, + patch.object(service, "_cleanup_empty_directories") as mock_cleanup_dirs, + patch.object(service, "_cleanup_tarball") as mock_cleanup_tarball, ): - result = service._create_and_send_tarball(files, "test", Path("/tmp")) - assert result == 1 + result = service._create_and_send_tarball( + files, "feedback", service.feedback_dir + ) + + assert result == 1 + mock_create.assert_called_once_with(files, "feedback", service.feedback_dir) + mock_send.assert_called_once_with(Path("/tmp/test.tar.gz")) + mock_cleanup_files.assert_called_once_with(files) + mock_cleanup_dirs.assert_called_once() + mock_cleanup_tarball.assert_called_once_with(Path("/tmp/test.tar.gz")) -@patch("services.data_collector.configuration") -def test_create_and_send_tarball_no_cleanup(mock_config) -> None: - """Test creating and sending tarball without cleanup.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.data_collector.cleanup_after_send = ( - False - ) - files = [Path("/tmp/test/file1.json")] +def test_create_and_send_tarball_no_cleanup() -> None: + """Test create and send tarball without cleanup.""" + service = _create_test_service(cleanup_after_send=False) + files = [Path("/tmp/feedback/file1.json")] with ( - patch.object(service, "_create_tarball", return_value=Path("/tmp/test.tar.gz")), - patch.object(service, "_send_tarball"), - patch.object(service, "_cleanup_tarball"), + patch.object( + service, "_create_tarball", return_value=Path("/tmp/test.tar.gz") + ) as mock_create, + patch.object(service, "_send_tarball") as mock_send, + patch.object(service, "_cleanup_files") as mock_cleanup_files, + patch.object(service, "_cleanup_empty_directories") as mock_cleanup_dirs, + patch.object(service, "_cleanup_tarball") as mock_cleanup_tarball, ): - result = service._create_and_send_tarball(files, "test", Path("/tmp")) + + result = service._create_and_send_tarball( + files, "feedback", service.feedback_dir + ) + assert result == 1 + mock_create.assert_called_once_with(files, "feedback", service.feedback_dir) + mock_send.assert_called_once_with(Path("/tmp/test.tar.gz")) + # Cleanup should not be called when cleanup_after_send is False + mock_cleanup_files.assert_not_called() + mock_cleanup_dirs.assert_not_called() + # But tarball cleanup should still happen + mock_cleanup_tarball.assert_called_once_with(Path("/tmp/test.tar.gz")) -@patch("services.data_collector.datetime") -@patch("services.data_collector.tempfile.gettempdir") @patch("services.data_collector.tarfile.open") -def test_create_tarball_success(mock_tarfile, mock_gettempdir, mock_datetime) -> None: +@patch("services.data_collector.tempfile.gettempdir", return_value="/tmp") +@patch("services.data_collector.datetime") +def test_create_tarball_success( + mock_datetime, mock_gettempdir, mock_tarfile_open +) -> None: """Test creating tarball successfully.""" - service = DataCollectorService() - mock_datetime.now.return_value.strftime.return_value = "20230101_120000" - mock_gettempdir.return_value = "/tmp" + service = _create_test_service() + files = [Path("/tmp/feedback/file1.json")] - mock_tar = MagicMock() - mock_tarfile.return_value.__enter__.return_value = mock_tar + # Mock datetime to return predictable timestamp + mock_datetime.now.return_value.strftime.return_value = "20231201_120000" - files = [Path("/data/test/file1.json")] + # Mock tarfile + mock_tar = MagicMock() + mock_tarfile_open.return_value.__enter__.return_value = mock_tar + # Mock Path.stat() for the created tarball with patch.object(Path, "stat") as mock_stat: mock_stat.return_value.st_size = 1024 - result = service._create_tarball(files, "test", Path("/data")) + result = service._create_tarball(files, "feedback", service.feedback_dir) - expected_path = Path("/tmp/test_20230101_120000.tar.gz") + expected_path = Path("/tmp/feedback_20231201_120000.tar.gz") assert result == expected_path + mock_tarfile_open.assert_called_once_with(expected_path, "w:gz") mock_tar.add.assert_called_once() -@patch("services.data_collector.datetime") -@patch("services.data_collector.tempfile.gettempdir") @patch("services.data_collector.tarfile.open") -def test_create_tarball_file_add_error( - mock_tarfile, mock_gettempdir, mock_datetime -) -> None: +def test_create_tarball_file_add_error(mock_tarfile_open) -> None: """Test creating tarball with file add error.""" - service = DataCollectorService() - mock_datetime.now.return_value.strftime.return_value = "20230101_120000" - mock_gettempdir.return_value = "/tmp" + service = _create_test_service() + files = [Path("/tmp/feedback/file1.json")] + # Mock tarfile to raise error on add mock_tar = MagicMock() - mock_tar.add.side_effect = OSError("File error") - mock_tarfile.return_value.__enter__.return_value = mock_tar - - files = [Path("/data/test/file1.json")] + mock_tar.add.side_effect = OSError("Permission denied") + mock_tarfile_open.return_value.__enter__.return_value = mock_tar with patch.object(Path, "stat") as mock_stat: mock_stat.return_value.st_size = 1024 - result = service._create_tarball(files, "test", Path("/data")) - - expected_path = Path("/tmp/test_20230101_120000.tar.gz") - assert result == expected_path + # Should not raise exception, just log warning + result = service._create_tarball(files, "feedback", service.feedback_dir) + assert isinstance(result, Path) -@patch("services.data_collector.configuration") @patch("services.data_collector.requests.post") -def test_send_tarball_success(mock_post, mock_config) -> None: - """Test successful tarball sending.""" - service = DataCollectorService() - - mock_config.user_data_collection_configuration.data_collector.ingress_server_url = ( - "http://test.com" - ) - mock_config.user_data_collection_configuration.data_collector.ingress_server_auth_token = ( - "token" - ) - mock_config.user_data_collection_configuration.data_collector.connection_timeout = ( - 30 - ) +def test_send_tarball_success(mock_post) -> None: + """Test sending tarball successfully.""" + service = _create_test_service() + tarball_path = Path("/tmp/test.tar.gz") + # Mock successful response mock_response = MagicMock() mock_response.status_code = 200 mock_post.return_value = mock_response - with patch("builtins.open", create=True) as mock_open: + with patch("builtins.open", mock_data=b"test data") as mock_open: mock_open.return_value.__enter__.return_value.read.return_value = b"test data" - service._send_tarball(Path("/tmp/test.tar.gz")) + + service._send_tarball(tarball_path) mock_post.assert_called_once() + call_args = mock_post.call_args + assert call_args[1]["data"] == b"test data" + assert "Authorization" in call_args[1]["headers"] + assert call_args[1]["headers"]["Authorization"] == "Bearer test-token" -@patch("services.data_collector.configuration") @patch("services.data_collector.requests.post") -def test_send_tarball_no_auth_token(mock_post, mock_config) -> None: +def test_send_tarball_no_auth_token(mock_post) -> None: """Test sending tarball without auth token.""" - service = DataCollectorService() - - mock_config.user_data_collection_configuration.data_collector.ingress_server_url = ( - "http://test.com" - ) - mock_config.user_data_collection_configuration.data_collector.ingress_server_auth_token = ( - None - ) - mock_config.user_data_collection_configuration.data_collector.connection_timeout = ( - 30 - ) + service = _create_test_service(ingress_server_auth_token="") + tarball_path = Path("/tmp/test.tar.gz") + # Mock successful response mock_response = MagicMock() mock_response.status_code = 200 mock_post.return_value = mock_response - with patch("builtins.open", create=True): - service._send_tarball(Path("/tmp/test.tar.gz")) + with patch("builtins.open", mock_data=b"test data") as mock_open: + mock_open.return_value.__enter__.return_value.read.return_value = b"test data" + + service._send_tarball(tarball_path) + mock_post.assert_called_once() + call_args = mock_post.call_args + assert "Authorization" not in call_args[1]["headers"] -@patch("services.data_collector.configuration") @patch("services.data_collector.requests.post") -def test_send_tarball_http_error(mock_post, mock_config) -> None: - """Test tarball sending with HTTP error.""" - service = DataCollectorService() - - mock_config.user_data_collection_configuration.data_collector.ingress_server_url = ( - "http://test.com" - ) - mock_config.user_data_collection_configuration.data_collector.connection_timeout = ( - 30 - ) +def test_send_tarball_http_error(mock_post) -> None: + """Test sending tarball with HTTP error.""" + service = _create_test_service() + tarball_path = Path("/tmp/test.tar.gz") + # Mock error response mock_response = MagicMock() mock_response.status_code = 500 - mock_response.text = "Server Error" + mock_response.text = "Internal Server Error" mock_post.return_value = mock_response - with patch("builtins.open", create=True): - try: - service._send_tarball(Path("/tmp/test.tar.gz")) - assert False, "Expected exception" - except Exception as e: - assert "Failed to send tarball" in str(e) + with patch("builtins.open", mock_data=b"test data") as mock_open: + mock_open.return_value.__enter__.return_value.read.return_value = b"test data" + try: + service._send_tarball(tarball_path) + assert False, "Expected HTTPError to be raised" + except requests.HTTPError as e: + assert "500" in str(e) -@patch("services.data_collector.configuration") -def test_send_tarball_missing_url(mock_config) -> None: - """Test tarball sending when ingress server URL is None.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.data_collector.ingress_server_url = ( - None - ) +def test_send_tarball_missing_url() -> None: + """Test sending tarball with missing URL.""" + service = _create_test_service(ingress_server_url="") + tarball_path = Path("/tmp/test.tar.gz") - try: - service._send_tarball(Path("/tmp/test.tar.gz")) - assert False, "Expected ValueError" - except ValueError as e: - assert "Ingress server URL is not configured" in str(e) + with patch("builtins.open", mock_data=b"test data") as mock_open: + mock_open.return_value.__enter__.return_value.read.return_value = b"test data" + # This should raise an exception when requests.post is called with empty URL + try: + service._send_tarball(tarball_path) + except Exception: + pass # Expected to fail with empty URL -@patch("services.data_collector.configuration") -def test_perform_collection_with_specific_exceptions(mock_config) -> None: - """Test _perform_collection with specific exception types that should be caught.""" - service = DataCollectorService() - # Test with OSError - with ( - patch.object( - service, "_collect_feedback_files", return_value=[Path("/tmp/test.json")] - ), - patch.object(service, "_collect_transcript_files", return_value=[]), - patch.object( - service, "_create_and_send_tarball", side_effect=OSError("OS Error") - ), - ): - try: - service._perform_collection() - assert False, "Expected OSError" - except OSError as e: - assert str(e) == "OS Error" +def test_perform_collection_with_specific_exceptions() -> None: + """Test perform collection with specific exception types.""" + service = _create_test_service() + feedback_files = [Path("/tmp/feedback/file1.json")] # Test with requests.RequestException with ( - patch.object( - service, "_collect_feedback_files", return_value=[Path("/tmp/test.json")] - ), + patch.object(service, "_collect_feedback_files", return_value=feedback_files), patch.object(service, "_collect_transcript_files", return_value=[]), patch.object( service, "_create_and_send_tarball", - side_effect=requests.RequestException("Request Error"), + side_effect=requests.RequestException("Network error"), ), ): + try: service._perform_collection() - assert False, "Expected RequestException" - except requests.RequestException as e: - assert str(e) == "Request Error" + assert False, "Expected RequestException to be raised" + except requests.RequestException: + pass # Test with tarfile.TarError with ( - patch.object( - service, "_collect_feedback_files", return_value=[Path("/tmp/test.json")] - ), + patch.object(service, "_collect_feedback_files", return_value=feedback_files), patch.object(service, "_collect_transcript_files", return_value=[]), patch.object( service, "_create_and_send_tarball", - side_effect=tarfile.TarError("Tar Error"), + side_effect=tarfile.TarError("Tar error"), ), ): + try: service._perform_collection() - assert False, "Expected TarError" - except tarfile.TarError as e: - assert str(e) == "Tar Error" + assert False, "Expected TarError to be raised" + except tarfile.TarError: + pass def test_cleanup_files_success() -> None: - """Test successful file cleanup.""" - service = DataCollectorService() - files = [Path("/tmp/test1.json"), Path("/tmp/test2.json")] + """Test cleaning up files successfully.""" + service = _create_test_service() + files = [Path("/tmp/file1.json"), Path("/tmp/file2.json")] with patch.object(Path, "unlink") as mock_unlink: service._cleanup_files(files) + assert mock_unlink.call_count == 2 def test_cleanup_files_with_error() -> None: - """Test file cleanup with error.""" - service = DataCollectorService() - files = [Path("/tmp/test1.json")] + """Test cleaning up files with error.""" + service = _create_test_service() + files = [Path("/tmp/file1.json")] - with patch.object(Path, "unlink") as mock_unlink: - mock_unlink.side_effect = OSError("Permission denied") + with patch.object(Path, "unlink", side_effect=OSError("Permission denied")): + # Should not raise exception, just log warning service._cleanup_files(files) - mock_unlink.assert_called_once() def test_cleanup_tarball_success() -> None: - """Test successful tarball cleanup.""" - service = DataCollectorService() + """Test cleaning up tarball successfully.""" + service = _create_test_service() + tarball_path = Path("/tmp/test.tar.gz") with patch.object(Path, "unlink") as mock_unlink: - service._cleanup_tarball(Path("/tmp/test.tar.gz")) + service._cleanup_tarball(tarball_path) mock_unlink.assert_called_once() def test_cleanup_tarball_with_error() -> None: - """Test tarball cleanup with error.""" - service = DataCollectorService() + """Test cleaning up tarball with error.""" + service = _create_test_service() + tarball_path = Path("/tmp/test.tar.gz") - with patch.object(Path, "unlink") as mock_unlink: - mock_unlink.side_effect = OSError("Permission denied") - service._cleanup_tarball(Path("/tmp/test.tar.gz")) - mock_unlink.assert_called_once() + with patch.object(Path, "unlink", side_effect=OSError("Permission denied")): + # Should not raise exception, just log warning + service._cleanup_tarball(tarball_path) -@patch("services.data_collector.configuration") -def test_cleanup_empty_directories_disabled(mock_config) -> None: - """Test directory cleanup when transcripts disabled.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = False +def test_cleanup_empty_directories_directory_not_exists() -> None: + """Test cleanup empty directories when directory doesn't exist.""" + service = _create_test_service(transcripts_dir=Path("/nonexistent")) + # Should not raise exception service._cleanup_empty_directories() -@patch("services.data_collector.configuration") -def test_cleanup_empty_directories_success(mock_config) -> None: - """Test successful directory cleanup.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = True - mock_config.user_data_collection_configuration.transcripts_storage = ( - "/tmp/transcripts" - ) - - transcripts_dir = MagicMock() - user_dir = MagicMock() - conv_dir = MagicMock() - - transcripts_dir.exists.return_value = True - transcripts_dir.iterdir.return_value = [user_dir] - user_dir.is_dir.return_value = True - user_dir.iterdir.side_effect = [ - [conv_dir], - [], - ] # First call returns conv_dir, second call empty - conv_dir.is_dir.return_value = True - conv_dir.iterdir.return_value = [] # Empty directory - - with patch("services.data_collector.Path", return_value=transcripts_dir): - service._cleanup_empty_directories() - - conv_dir.rmdir.assert_called_once() - user_dir.rmdir.assert_called_once() - - -@patch("services.data_collector.configuration") -def test_cleanup_empty_directories_with_errors(mock_config) -> None: - """Test directory cleanup when rmdir operations fail.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = True - mock_config.user_data_collection_configuration.transcripts_storage = ( - "/tmp/transcripts" - ) +def test_cleanup_empty_directories_success() -> None: + """Test cleaning up empty directories successfully.""" + service = _create_test_service() - transcripts_dir = MagicMock() - user_dir = MagicMock() - conv_dir = MagicMock() + # Mock directory structure + mock_user_dir = MagicMock() + mock_conv_dir = MagicMock() + mock_conv_dir.is_dir.return_value = True + mock_conv_dir.iterdir.return_value = [] # Empty directory + mock_user_dir.is_dir.return_value = True + mock_user_dir.iterdir.return_value = [mock_conv_dir] - transcripts_dir.exists.return_value = True - transcripts_dir.iterdir.return_value = [user_dir] - user_dir.is_dir.return_value = True - user_dir.iterdir.side_effect = [[conv_dir], []] - conv_dir.is_dir.return_value = True - conv_dir.iterdir.return_value = [] + with ( + patch("pathlib.Path.exists", return_value=True), + patch("pathlib.Path.iterdir", return_value=[mock_user_dir]), + ): - # Both rmdir operations fail - conv_dir.rmdir.side_effect = OSError("Permission denied") - user_dir.rmdir.side_effect = OSError("Permission denied") + # After removing conv_dir, user_dir should be empty + mock_user_dir.iterdir.side_effect = [ + [mock_conv_dir], + [], + ] # First call returns conv_dir, second call empty - with patch("services.data_collector.Path", return_value=transcripts_dir): - # Should not raise exception service._cleanup_empty_directories() - conv_dir.rmdir.assert_called_once() - user_dir.rmdir.assert_called_once() + mock_conv_dir.rmdir.assert_called_once() + mock_user_dir.rmdir.assert_called_once() -@patch("services.data_collector.configuration") -def test_cleanup_empty_directories_directory_not_exists(mock_config) -> None: - """Test directory cleanup when transcripts directory doesn't exist.""" - service = DataCollectorService() - mock_config.user_data_collection_configuration.transcripts_enabled = True - mock_config.user_data_collection_configuration.transcripts_storage = ( - "/tmp/transcripts" - ) +def test_cleanup_empty_directories_with_errors() -> None: + """Test cleaning up empty directories with errors.""" + service = _create_test_service() - with patch("services.data_collector.Path") as mock_path: - mock_path.return_value.exists.return_value = False + # Mock directory structure + mock_user_dir = MagicMock() + mock_conv_dir = MagicMock() + mock_conv_dir.is_dir.return_value = True + mock_conv_dir.iterdir.return_value = [] # Empty directory + mock_conv_dir.rmdir.side_effect = OSError("Permission denied") + mock_user_dir.is_dir.return_value = True + mock_user_dir.iterdir.return_value = [mock_conv_dir] - service._cleanup_empty_directories() + with ( + patch("pathlib.Path.exists", return_value=True), + patch("pathlib.Path.iterdir", return_value=[mock_user_dir]), + ): + # Should not raise exception even with rmdir errors + service._cleanup_empty_directories() -@patch("services.data_collector.configuration") -def test_perform_collection_with_transcript_files(mock_config) -> None: - """Test _perform_collection with transcript files only.""" - service = DataCollectorService() - transcript_files = [Path("/tmp/transcripts/file1.json")] +def test_perform_collection_with_transcript_files() -> None: + """Test perform collection with only transcript files.""" + service = _create_test_service() + transcript_files = [Path("/tmp/transcripts/user1/conv1/file1.json")] with ( patch.object(service, "_collect_feedback_files", return_value=[]), patch.object( service, "_collect_transcript_files", return_value=transcript_files ), - patch.object(service, "_create_and_send_tarball", return_value=1), + patch.object( + service, "_create_and_send_tarball", return_value=1 + ) as mock_create_send, ): + service._perform_collection() + + # Should be called once for transcripts only + mock_create_send.assert_called_once_with( + transcript_files, "transcripts", service.transcripts_dir + )