diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index e28c07ee..a4ce3726 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -1,10 +1,12 @@ """Llama Stack configuration handling.""" +from typing import Any + import yaml from log import get_logger -from models.config import Configuration +from models.config import Configuration, ByokRag logger = get_logger(__name__) @@ -28,10 +30,81 @@ def generate_configuration( with open(input_file, "r", encoding="utf-8") as file: ls_config = yaml.safe_load(file) - logger.info("Processing Llama Stack configuration") - _ = config + if len(config.byok_rag) == 0: + logger.info("BYOK RAG is not configured: finishing") + else: + logger.info("Processing Llama Stack configuration") + # create or update configuration section vector_dbs + ls_config["vector_dbs"] = construct_vector_dbs_section( + ls_config, config.byok_rag + ) + # create or update configuration section providers/vector_io + ls_config["providers"]["vector_io"] = construct_vector_io_providers_section( + ls_config, config.byok_rag + ) logger.info("Writing Llama Stack configuration into file %s", output_file) with open(output_file, "w", encoding="utf-8") as file: yaml.dump(ls_config, file, Dumper=YamlDumper, default_flow_style=False) + + +def construct_vector_dbs_section( + ls_config: dict[str, Any], byok_rag: list[ByokRag] +) -> list[dict[str, Any]]: + """Construct vector_dbs section in Llama Stack configuration file.""" + output = [] + + # fill-in existing vector_dbs entries + if "vector_dbs" in ls_config: + output = ls_config["vector_dbs"] + + # append new vector_dbs entries + for brag in byok_rag: + output.append( + { + "vector_db_id": brag.vector_db_id, + "provider_id": "byok_" + brag.vector_db_id, + "embedding_model": brag.embedding_model, + "embedding_dimension": brag.embedding_dimension, + } + ) + logger.info( + "Added %s items into vector_dbs section, total items %s", + len(byok_rag), + len(output), + ) + return output + + +def construct_vector_io_providers_section( + ls_config: dict[str, Any], byok_rag: list[ByokRag] +) -> list[dict[str, Any]]: + """Construct providers/vector_io section in Llama Stack configuration file.""" + output = [] + + # fill-in existing vector_io entries + if "vector_io" in ls_config["providers"]: + output = ls_config["providers"]["vector_io"] + + # append new vector_io entries + for brag in byok_rag: + output.append( + { + "provider_id": "byok_" + brag.vector_db_id, + "provider_type": brag.rag_type, + "config": { + "kvstore": { + "db_path": ".llama/" + brag.vector_db_id + ".db", + "namespace": None, + "type": "sqlite", + } + }, + } + ) + logger.info( + "Added %s items into providers/vector_io section, total items %s", + len(byok_rag), + len(output), + ) + return output diff --git a/tests/unit/test_llama_stack_configuration.py b/tests/unit/test_llama_stack_configuration.py new file mode 100644 index 00000000..52271afd --- /dev/null +++ b/tests/unit/test_llama_stack_configuration.py @@ -0,0 +1,379 @@ +"""Unit tests for functions defined in src/llama_stack_configuration.py.""" + +from pathlib import Path + +import pytest +import yaml + +from models.config import ( + ByokRag, + Configuration, + ServiceConfiguration, + LlamaStackConfiguration, + UserDataCollection, +) + +from constants import ( + DEFAULT_EMBEDDING_MODEL, + DEFAULT_EMBEDDING_DIMENSION, +) + +from llama_stack_configuration import ( + generate_configuration, + construct_vector_dbs_section, + construct_vector_io_providers_section, +) + + +def test_construct_vector_dbs_section_init() -> None: + """Test the function construct_vector_dbs_section for no vector_dbs configured before.""" + ls_config = {} + byok_rag = [] + output = construct_vector_dbs_section(ls_config, byok_rag) + assert len(output) == 0 + + +def test_construct_vector_dbs_section_init_with_existing_data() -> None: + """Test the function construct_vector_dbs_section for vector_dbs configured before.""" + ls_config = { + "vector_dbs": [ + { + "vector_db_id": "vector_db_id_1", + "provider_id": "provier_id_1", + "embedding_model": "embedding_model_1", + "embedding_dimension": 1, + }, + { + "vector_db_id": "vector_db_id_2", + "provider_id": "provier_id_2", + "embedding_model": "embedding_model_2", + "embedding_dimension": 2, + }, + ] + } + byok_rag = [] + output = construct_vector_dbs_section(ls_config, byok_rag) + assert len(output) == 2 + assert output[0] == { + "vector_db_id": "vector_db_id_1", + "provider_id": "provier_id_1", + "embedding_model": "embedding_model_1", + "embedding_dimension": 1, + } + assert output[1] == { + "vector_db_id": "vector_db_id_2", + "provider_id": "provier_id_2", + "embedding_model": "embedding_model_2", + "embedding_dimension": 2, + } + + +def test_construct_vector_dbs_section_append() -> None: + """Test the function construct_vector_dbs_section for no vector_dbs configured before.""" + ls_config = {} + byok_rag = [ + ByokRag( + rag_id="rag_id_1", + vector_db_id="vector_db_id_1", + db_path="tests/configuration/rag.txt", + ), + ByokRag( + rag_id="rag_id_2", + vector_db_id="vector_db_id_2", + db_path="tests/configuration/rag.txt", + ), + ] + output = construct_vector_dbs_section(ls_config, byok_rag) + assert len(output) == 2 + assert output[0] == { + "vector_db_id": "vector_db_id_1", + "provider_id": "byok_vector_db_id_1", + "embedding_model": DEFAULT_EMBEDDING_MODEL, + "embedding_dimension": DEFAULT_EMBEDDING_DIMENSION, + } + assert output[1] == { + "vector_db_id": "vector_db_id_2", + "provider_id": "byok_vector_db_id_2", + "embedding_model": DEFAULT_EMBEDDING_MODEL, + "embedding_dimension": DEFAULT_EMBEDDING_DIMENSION, + } + + +def test_construct_vector_dbs_section_full_merge() -> None: + """Test the function construct_vector_dbs_section for vector_dbs configured before.""" + ls_config = { + "vector_dbs": [ + { + "vector_db_id": "vector_db_id_1", + "provider_id": "provier_id_1", + "embedding_model": "embedding_model_1", + "embedding_dimension": 1, + }, + { + "vector_db_id": "vector_db_id_2", + "provider_id": "provier_id_2", + "embedding_model": "embedding_model_2", + "embedding_dimension": 2, + }, + ] + } + byok_rag = [ + ByokRag( + rag_id="rag_id_1", + vector_db_id="vector_db_id_1", + db_path="tests/configuration/rag.txt", + ), + ByokRag( + rag_id="rag_id_2", + vector_db_id="vector_db_id_2", + db_path="tests/configuration/rag.txt", + ), + ] + output = construct_vector_dbs_section(ls_config, byok_rag) + assert len(output) == 4 + assert output[0] == { + "vector_db_id": "vector_db_id_1", + "provider_id": "provier_id_1", + "embedding_model": "embedding_model_1", + "embedding_dimension": 1, + } + assert output[1] == { + "vector_db_id": "vector_db_id_2", + "provider_id": "provier_id_2", + "embedding_model": "embedding_model_2", + "embedding_dimension": 2, + } + assert output[2] == { + "vector_db_id": "vector_db_id_1", + "provider_id": "byok_vector_db_id_1", + "embedding_model": DEFAULT_EMBEDDING_MODEL, + "embedding_dimension": DEFAULT_EMBEDDING_DIMENSION, + } + assert output[3] == { + "vector_db_id": "vector_db_id_2", + "provider_id": "byok_vector_db_id_2", + "embedding_model": DEFAULT_EMBEDDING_MODEL, + "embedding_dimension": DEFAULT_EMBEDDING_DIMENSION, + } + + +def test_construct_vector_io_providers_section_init() -> None: + """Test construct_vector_io_providers_section for no vector_io_providers configured before.""" + ls_config = {"providers": {}} + byok_rag = [] + output = construct_vector_io_providers_section(ls_config, byok_rag) + assert len(output) == 0 + + +def test_construct_vector_io_providers_section_init_with_existing_data() -> None: + """Test construct_vector_io_providers_section for vector_io_providers configured before.""" + ls_config = { + "providers": { + "vector_io": [ + { + "provider_id": "faiss_1", + "provider_type": "inline::faiss", + }, + { + "provider_id": "faiss_2", + "provider_type": "inline::faiss", + }, + ] + } + } + byok_rag = [] + output = construct_vector_io_providers_section(ls_config, byok_rag) + assert len(output) == 2 + assert output[0] == { + "provider_id": "faiss_1", + "provider_type": "inline::faiss", + } + assert output[1] == { + "provider_id": "faiss_2", + "provider_type": "inline::faiss", + } + + +def test_construct_vector_io_providers_section_append() -> None: + """Test construct_vector_io_providers_section for no vector_io_providers configured before.""" + ls_config = {"providers": {}} + byok_rag = [ + ByokRag( + rag_id="rag_id_1", + vector_db_id="vector_db_id_1", + db_path="tests/configuration/rag.txt", + ), + ByokRag( + rag_id="rag_id_2", + vector_db_id="vector_db_id_2", + db_path="tests/configuration/rag.txt", + ), + ] + output = construct_vector_io_providers_section(ls_config, byok_rag) + assert len(output) == 2 + assert output[0] == { + "provider_id": "byok_vector_db_id_1", + "provider_type": "inline::faiss", + "config": { + "kvstore": { + "db_path": ".llama/vector_db_id_1.db", + "namespace": None, + "type": "sqlite", + }, + }, + } + assert output[1] == { + "provider_id": "byok_vector_db_id_2", + "provider_type": "inline::faiss", + "config": { + "kvstore": { + "db_path": ".llama/vector_db_id_2.db", + "namespace": None, + "type": "sqlite", + }, + }, + } + + +def test_construct_vector_io_providers_section_full_merge() -> None: + """Test construct_vector_io_providers_section for vector_io_providers configured before.""" + ls_config = { + "providers": { + "vector_io": [ + { + "provider_id": "faiss_1", + "provider_type": "inline::faiss", + }, + { + "provider_id": "faiss_2", + "provider_type": "inline::faiss", + }, + ] + } + } + byok_rag = [ + ByokRag( + rag_id="rag_id_1", + vector_db_id="vector_db_id_1", + db_path="tests/configuration/rag.txt", + ), + ByokRag( + rag_id="rag_id_2", + vector_db_id="vector_db_id_2", + db_path="tests/configuration/rag.txt", + ), + ] + output = construct_vector_io_providers_section(ls_config, byok_rag) + assert len(output) == 4 + assert output[0] == { + "provider_id": "faiss_1", + "provider_type": "inline::faiss", + } + assert output[1] == { + "provider_id": "faiss_2", + "provider_type": "inline::faiss", + } + assert output[2] == { + "provider_id": "byok_vector_db_id_1", + "provider_type": "inline::faiss", + "config": { + "kvstore": { + "db_path": ".llama/vector_db_id_1.db", + "namespace": None, + "type": "sqlite", + }, + }, + } + assert output[3] == { + "provider_id": "byok_vector_db_id_2", + "provider_type": "inline::faiss", + "config": { + "kvstore": { + "db_path": ".llama/vector_db_id_2.db", + "namespace": None, + "type": "sqlite", + }, + }, + } + + +def test_generate_configuration_no_input_file(tmpdir: Path) -> None: + """Test the function to generate configuration when input file does not exist.""" + cfg = Configuration( + name="test_name", + service=ServiceConfiguration(), + llama_stack=LlamaStackConfiguration( + use_as_library_client=True, + library_client_config_path="tests/configuration/run.yaml", + api_key="whatever", + ), + user_data_collection=UserDataCollection( + feedback_enabled=False, feedback_storage=None + ), + ) + outfile = tmpdir / "run.xml" + # try to generate new configuration file + with pytest.raises(FileNotFoundError, match="No such file"): + generate_configuration("/does/not/exist", outfile, cfg) + + +def test_generate_configuration_proper_input_file_no_byok(tmpdir: Path) -> None: + """Test the function to generate configuration when input file exists.""" + cfg = Configuration( + name="test_name", + service=ServiceConfiguration(), + llama_stack=LlamaStackConfiguration( + use_as_library_client=True, + library_client_config_path="tests/configuration/run.yaml", + api_key="whatever", + ), + user_data_collection=UserDataCollection( + feedback_enabled=False, feedback_storage=None + ), + ) + outfile = tmpdir / "run.xml" + # try to generate new configuration file + generate_configuration("tests/configuration/run.yaml", outfile, cfg) + + with open(outfile, "r", encoding="utf-8") as fin: + generated = yaml.safe_load(fin) + assert "vector_dbs" in generated + assert "providers" in generated + assert "vector_io" in generated["providers"] + + +def test_generate_configuration_proper_input_file_configured_byok(tmpdir: Path) -> None: + """Test the function to generate configuration when BYOK RAG should be added.""" + cfg = Configuration( + name="test_name", + service=ServiceConfiguration(), + llama_stack=LlamaStackConfiguration( + use_as_library_client=True, + library_client_config_path="tests/configuration/run.yaml", + api_key="whatever", + ), + user_data_collection=UserDataCollection( + feedback_enabled=False, feedback_storage=None + ), + byok_rag=[ + ByokRag( + rag_id="rag_id_1", + vector_db_id="vector_db_id_1", + db_path="tests/configuration/rag.txt", + ), + ByokRag( + rag_id="rag_id_2", + vector_db_id="vector_db_id_2", + db_path="tests/configuration/rag.txt", + ), + ], + ) + outfile = tmpdir / "run.xml" + # try to generate new configuration file + generate_configuration("tests/configuration/run.yaml", outfile, cfg) + + with open(outfile, "r", encoding="utf-8") as fin: + generated = yaml.safe_load(fin) + assert "vector_dbs" in generated + assert "providers" in generated + assert "vector_io" in generated["providers"]