From c888b96b5ce1132180c275cc77edb252facd2f28 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Thu, 4 Sep 2025 08:51:41 +0200 Subject: [PATCH 01/11] Update langfuse dependency to version 3.3.4 across multiple services and libraries; enhance tracing in TracedRunnable to include input and output in span updates. --- infrastructure/rag/Chart.lock | 6 +- infrastructure/rag/Chart.yaml | 2 +- infrastructure/rag/values.yaml | 155 ++++++++++++------ libs/admin-api-lib/poetry.lock | 115 +------------ libs/admin-api-lib/pyproject.toml | 2 +- libs/rag-core-api/poetry.lock | 51 +----- libs/rag-core-api/pyproject.toml | 2 +- .../evaluator/langfuse_ragas_evaluator.py | 116 ++++++------- libs/rag-core-lib/poetry.lock | 113 +------------ libs/rag-core-lib/pyproject.toml | 2 +- .../rag_core_lib/tracers/traced_runnable.py | 8 +- services/admin-backend/poetry.lock | 115 +------------ services/rag-backend/poetry.lock | 51 +----- 13 files changed, 208 insertions(+), 530 deletions(-) diff --git a/infrastructure/rag/Chart.lock b/infrastructure/rag/Chart.lock index 8cf43546..60ca10cc 100644 --- a/infrastructure/rag/Chart.lock +++ b/infrastructure/rag/Chart.lock @@ -1,7 +1,7 @@ dependencies: - name: langfuse repository: https://langfuse.github.io/langfuse-k8s - version: 0.12.1 + version: 1.5.1 - name: qdrant repository: https://qdrant.github.io/qdrant-helm version: 1.12.6 @@ -14,5 +14,5 @@ dependencies: - name: ollama repository: https://otwld.github.io/ollama-helm/ version: 1.1.0 -digest: sha256:9c99676d554fe68802c434b3daac2d50778e38d2793d10b84d1f4d6d78f99726 -generated: "2025-07-02T12:36:38.510315+02:00" +digest: sha256:02781b4c97462f6c600d3fadf7127790482a20e537596fed4722ef912d15ec00 +generated: "2025-09-03T15:42:41.704412+02:00" diff --git a/infrastructure/rag/Chart.yaml b/infrastructure/rag/Chart.yaml index 138164d1..d3a0ff4c 100644 --- a/infrastructure/rag/Chart.yaml +++ b/infrastructure/rag/Chart.yaml @@ -10,7 +10,7 @@ appVersion: "1.0.0" dependencies: - name: langfuse repository: https://langfuse.github.io/langfuse-k8s - version: "0.12.1" + version: "1.5.1" condition: features.langfuse.enabled - name: qdrant version: 1.12.6 diff --git a/infrastructure/rag/values.yaml b/infrastructure/rag/values.yaml index ac0e0160..31fd0d6a 100644 --- a/infrastructure/rag/values.yaml +++ b/infrastructure/rag/values.yaml @@ -445,25 +445,39 @@ shared: langfuse: - minio: - deploy: false - valkey: - deploy: false #<--- keydb is used instead of valkey - image: - repository: ghcr.io/langfuse/langfuse - pullPolicy: Always - tag: "3.27.2" - postgresql: - deploy: true - auth: - username: postgres - password: postgres - database: langfuse + # Core Langfuse Configuration langfuse: + # Used to hash API keys + salt: + value: "changeme" + + # Authentication settings + features: + telemetryEnabled: true + signUpDisabled: false + + # Web deployment configuration + web: + image: + repository: langfuse/langfuse + tag: "3.106.3" + pullPolicy: Always + + # Worker deployment configuration + worker: + image: + repository: langfuse/langfuse-worker + tag: "3.106.3" + pullPolicy: Always + port: 3030 + + # NextAuth configuration nextauth: url: http://localhost:3000 - secret: changeme - salt: changeme + secret: + value: "changeme" + + # Additional environment variables (only for init values) additionalEnv: - name: LANGFUSE_INIT_ORG_ID value: "" @@ -479,33 +493,8 @@ langfuse: value: "" - name: LANGFUSE_INIT_USER_PASSWORD value: "" - # REDIS - - name: "REDIS_CONNECTION_STRING" - value: "redis://rag-keydb:6379" - # CLICKHOUSE - - name: "CLICKHOUSE_MIGRATION_URL" - value: "clickhouse://rag-clickhouse:9000" - - name: "CLICKHOUSE_URL" - value: "http://rag-clickhouse:8123" - - name: "CLICKHOUSE_USER" - value: "default" - - name: "CLICKHOUSE_PASSWORD" - value: "changeme" - # S3 / MinIO - - name: "LANGFUSE_S3_EVENT_UPLOAD_ENABLED" - value: "true" - - name: "LANGFUSE_S3_EVENT_UPLOAD_BUCKET" - value: "langfuse" - - name: "LANGFUSE_S3_EVENT_UPLOAD_REGION" - value: "auto" - - name: "LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID" - value: "admin" - - name: "LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY" - value: "adminpassword" - - name: "LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT" - value: "http://rag-minio:9000" - - name: "LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE" - value: "true" + + # Additional init containers extraInitContainers: - name: wait-for-postgres image: busybox @@ -520,6 +509,77 @@ langfuse: # Define a reasonable timeout in case PostgreSQL fails to come up timeoutSeconds: 300 + # PostgreSQL Configuration (use external PostgreSQL) + postgresql: + deploy: true + host: "rag-postgresql" + port: 5432 + auth: + username: postgres + password: postgres + database: langfuse + + # Redis Configuration (external KeyDB) + redis: + deploy: false + host: "rag-keydb" + port: 6379 + auth: + username: "default" + password: "" + + # ClickHouse Configuration (external ClickHouse) + clickhouse: + deploy: true + host: "rag-clickhouse" + httpPort: 8123 + nativePort: 9000 + auth: + username: "default" + password: "changeme" + migration: + url: "clickhouse://rag-clickhouse:9000" + ssl: false + autoMigrate: true + resources: + limits: + cpu: "2" + memory: "8Gi" + requests: + cpu: "2" + memory: "4Gi" + + zookeeper: + resources: + limits: + cpu: "2" + memory: "2Gi" + requests: + cpu: "1" + memory: "1Gi" + + # S3/MinIO Configuration (external MinIO) + s3: + deploy: false + bucket: "langfuse" + region: "auto" + endpoint: "http://rag-minio:9000" + forcePathStyle: true + accessKeyId: + value: "admin" + secretAccessKey: + value: "adminpassword" + eventUpload: + enabled: true + bucket: "langfuse" + region: "auto" + endpoint: "http://rag-minio:9000" + forcePathStyle: true + accessKeyId: + value: "admin" + secretAccessKey: + value: "adminpassword" + minio: auth: ## @param auth.rootUser MinIO® root username @@ -537,10 +597,6 @@ minio: enabled: false mode: standalone - - - - ollama: image: tag: 0.5.1 @@ -556,3 +612,8 @@ ollama: qdrant: image: tag: v1.14.1 + +keydb: + multiMaster: "no" + activeReplicas: "no" + nodes: 1 diff --git a/libs/admin-api-lib/poetry.lock b/libs/admin-api-lib/poetry.lock index a98ba260..a2bcc1d5 100644 --- a/libs/admin-api-lib/poetry.lock +++ b/libs/admin-api-lib/poetry.lock @@ -1413,70 +1413,6 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] -[[package]] -name = "grpcio" -version = "1.73.1" -description = "HTTP/2-based RPC framework" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "grpcio-1.73.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2d70f4ddd0a823436c2624640570ed6097e40935c9194482475fe8e3d9754d55"}, - {file = "grpcio-1.73.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:3841a8a5a66830261ab6a3c2a3dc539ed84e4ab019165f77b3eeb9f0ba621f26"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:628c30f8e77e0258ab788750ec92059fc3d6628590fb4b7cea8c102503623ed7"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67a0468256c9db6d5ecb1fde4bf409d016f42cef649323f0a08a72f352d1358b"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b84d65bbdebd5926eb5c53b0b9ec3b3f83408a30e4c20c373c5337b4219ec5"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c54796ca22b8349cc594d18b01099e39f2b7ffb586ad83217655781a350ce4da"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:75fc8e543962ece2f7ecd32ada2d44c0c8570ae73ec92869f9af8b944863116d"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6a6037891cd2b1dd1406b388660522e1565ed340b1fea2955b0234bdd941a862"}, - {file = "grpcio-1.73.1-cp310-cp310-win32.whl", hash = "sha256:cce7265b9617168c2d08ae570fcc2af4eaf72e84f8c710ca657cc546115263af"}, - {file = "grpcio-1.73.1-cp310-cp310-win_amd64.whl", hash = "sha256:6a2b372e65fad38842050943f42ce8fee00c6f2e8ea4f7754ba7478d26a356ee"}, - {file = "grpcio-1.73.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:ba2cea9f7ae4bc21f42015f0ec98f69ae4179848ad744b210e7685112fa507a1"}, - {file = "grpcio-1.73.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d74c3f4f37b79e746271aa6cdb3a1d7e4432aea38735542b23adcabaaee0c097"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5b9b1805a7d61c9e90541cbe8dfe0a593dfc8c5c3a43fe623701b6a01b01d710"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3215f69a0670a8cfa2ab53236d9e8026bfb7ead5d4baabe7d7dc11d30fda967"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5eccfd9577a5dc7d5612b2ba90cca4ad14c6d949216c68585fdec9848befb1"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc7d7fd520614fce2e6455ba89791458020a39716951c7c07694f9dbae28e9c0"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:105492124828911f85127e4825d1c1234b032cb9d238567876b5515d01151379"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:610e19b04f452ba6f402ac9aa94eb3d21fbc94553368008af634812c4a85a99e"}, - {file = "grpcio-1.73.1-cp311-cp311-win32.whl", hash = "sha256:d60588ab6ba0ac753761ee0e5b30a29398306401bfbceffe7d68ebb21193f9d4"}, - {file = "grpcio-1.73.1-cp311-cp311-win_amd64.whl", hash = "sha256:6957025a4608bb0a5ff42abd75bfbb2ed99eda29d5992ef31d691ab54b753643"}, - {file = "grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf"}, - {file = "grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8"}, - {file = "grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642"}, - {file = "grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646"}, - {file = "grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9"}, - {file = "grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668"}, - {file = "grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4"}, - {file = "grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f"}, - {file = "grpcio-1.73.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:b4adc97d2d7f5c660a5498bda978ebb866066ad10097265a5da0511323ae9f50"}, - {file = "grpcio-1.73.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c45a28a0cfb6ddcc7dc50a29de44ecac53d115c3388b2782404218db51cb2df3"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:10af9f2ab98a39f5b6c1896c6fc2036744b5b41d12739d48bed4c3e15b6cf900"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45cf17dcce5ebdb7b4fe9e86cb338fa99d7d1bb71defc78228e1ddf8d0de8cbb"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c502c2e950fc7e8bf05c047e8a14522ef7babac59abbfde6dbf46b7a0d9c71e"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6abfc0f9153dc4924536f40336f88bd4fe7bd7494f028675e2e04291b8c2c62a"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ed451a0e39c8e51eb1612b78686839efd1a920666d1666c1adfdb4fd51680c0f"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:07f08705a5505c9b5b0cbcbabafb96462b5a15b7236bbf6bbcc6b0b91e1cbd7e"}, - {file = "grpcio-1.73.1-cp39-cp39-win32.whl", hash = "sha256:ad5c958cc3d98bb9d71714dc69f1c13aaf2f4b53e29d4cc3f1501ef2e4d129b2"}, - {file = "grpcio-1.73.1-cp39-cp39-win_amd64.whl", hash = "sha256:42f0660bce31b745eb9d23f094a332d31f210dcadd0fc8e5be7e4c62a87ce86b"}, - {file = "grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.73.1)"] - [[package]] name = "h11" version = "0.16.0" @@ -1922,23 +1858,23 @@ langchain-core = ">=0.3.51,<1.0.0" [[package]] name = "langfuse" -version = "3.0.0" +version = "3.3.4" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.9" groups = ["main"] files = [ - {file = "langfuse-3.0.0-py3-none-any.whl", hash = "sha256:5f493bd15760b15195cc71ec7aaa4f93008058a1f5dfe4b32f72f31cbdc6f605"}, - {file = "langfuse-3.0.0.tar.gz", hash = "sha256:c47449ae93a3007efee6b861484f529efb187ddbfb5093e0cb94b84636e0a605"}, + {file = "langfuse-3.3.4-py3-none-any.whl", hash = "sha256:15b9d20878cf39a48ca9cfa7e52acdfeb043603d3a9cef8cf451687a4d838c6b"}, + {file = "langfuse-3.3.4.tar.gz", hash = "sha256:e5df4e7284298990b522e02a1dc6c3c72ebc4a7a411dc7d39255fb8c2e5a7c3a"}, ] [package.dependencies] backoff = ">=1.10.0" httpx = ">=0.15.4,<1.0" opentelemetry-api = ">=1.33.1,<2.0.0" -opentelemetry-exporter-otlp = ">=1.33.1,<2.0.0" +opentelemetry-exporter-otlp-proto-http = ">=1.33.1,<2.0.0" opentelemetry-sdk = ">=1.33.1,<2.0.0" -packaging = ">=23.2,<25.0" +packaging = ">=23.2,<26.0" pydantic = ">=1.10.7,<3.0" requests = ">=2,<3" wrapt = ">=1.14,<2.0" @@ -2351,22 +2287,6 @@ files = [ importlib-metadata = ">=6.0,<8.8.0" typing-extensions = ">=4.5.0" -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.34.1" -description = "OpenTelemetry Collector Exporters" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp-1.34.1-py3-none-any.whl", hash = "sha256:f4a453e9cde7f6362fd4a090d8acf7881d1dc585540c7b65cbd63e36644238d4"}, - {file = "opentelemetry_exporter_otlp-1.34.1.tar.gz", hash = "sha256:71c9ad342d665d9e4235898d205db17c5764cd7a69acb8a5dcd6d5e04c4c9988"}, -] - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.34.1" -opentelemetry-exporter-otlp-proto-http = "1.34.1" - [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.34.1" @@ -2382,27 +2302,6 @@ files = [ [package.dependencies] opentelemetry-proto = "1.34.1" -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.34.1" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.52,<2.0" -grpcio = {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""} -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.34.1" -opentelemetry-proto = "1.34.1" -opentelemetry-sdk = ">=1.34.1,<1.35.0" -typing-extensions = ">=4.5.0" - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.34.1" @@ -3160,7 +3059,7 @@ langchain = "^0.3.25" langchain-community = "0.3.23" langchain-core = "0.3.68" langchain-openai = "^0.3.27" -langfuse = "3.0.0" +langfuse = "3.3.4" oauthlib = "^3.2.2" openai = "^1.77.0" pydantic = "^2.11.4" @@ -4120,4 +4019,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.13" -content-hash = "28827653a07bb505f4da3384ec9896368e8a8aa078b0d6fb6e690754766411c9" +content-hash = "e2650edcd0a50add03d5948363d4baf8ee8f8a785e47c27d9bf48c97de5bfe8e" diff --git a/libs/admin-api-lib/pyproject.toml b/libs/admin-api-lib/pyproject.toml index a811da10..2cc4896b 100644 --- a/libs/admin-api-lib/pyproject.toml +++ b/libs/admin-api-lib/pyproject.toml @@ -104,7 +104,7 @@ python-dateutil = "^2.9.0.post0" tenacity = "9.1.2" boto3 = "^1.38.10" tqdm = "^4.67.1" -langfuse = "3.0.0" +langfuse = "3.3.4" redis = "^6.0.0" pyyaml = "^6.0.2" python-multipart = "^0.0.20" diff --git a/libs/rag-core-api/poetry.lock b/libs/rag-core-api/poetry.lock index fbe46b75..b04315c9 100644 --- a/libs/rag-core-api/poetry.lock +++ b/libs/rag-core-api/poetry.lock @@ -2072,23 +2072,23 @@ six = "*" [[package]] name = "langfuse" -version = "3.0.0" +version = "3.3.4" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.9" groups = ["main"] files = [ - {file = "langfuse-3.0.0-py3-none-any.whl", hash = "sha256:5f493bd15760b15195cc71ec7aaa4f93008058a1f5dfe4b32f72f31cbdc6f605"}, - {file = "langfuse-3.0.0.tar.gz", hash = "sha256:c47449ae93a3007efee6b861484f529efb187ddbfb5093e0cb94b84636e0a605"}, + {file = "langfuse-3.3.4-py3-none-any.whl", hash = "sha256:15b9d20878cf39a48ca9cfa7e52acdfeb043603d3a9cef8cf451687a4d838c6b"}, + {file = "langfuse-3.3.4.tar.gz", hash = "sha256:e5df4e7284298990b522e02a1dc6c3c72ebc4a7a411dc7d39255fb8c2e5a7c3a"}, ] [package.dependencies] backoff = ">=1.10.0" httpx = ">=0.15.4,<1.0" opentelemetry-api = ">=1.33.1,<2.0.0" -opentelemetry-exporter-otlp = ">=1.33.1,<2.0.0" +opentelemetry-exporter-otlp-proto-http = ">=1.33.1,<2.0.0" opentelemetry-sdk = ">=1.33.1,<2.0.0" -packaging = ">=23.2,<25.0" +packaging = ">=23.2,<26.0" pydantic = ">=1.10.7,<3.0" requests = ">=2,<3" wrapt = ">=1.14,<2.0" @@ -2740,22 +2740,6 @@ files = [ importlib-metadata = ">=6.0,<8.8.0" typing-extensions = ">=4.5.0" -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.34.1" -description = "OpenTelemetry Collector Exporters" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp-1.34.1-py3-none-any.whl", hash = "sha256:f4a453e9cde7f6362fd4a090d8acf7881d1dc585540c7b65cbd63e36644238d4"}, - {file = "opentelemetry_exporter_otlp-1.34.1.tar.gz", hash = "sha256:71c9ad342d665d9e4235898d205db17c5764cd7a69acb8a5dcd6d5e04c4c9988"}, -] - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.34.1" -opentelemetry-exporter-otlp-proto-http = "1.34.1" - [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.34.1" @@ -2771,27 +2755,6 @@ files = [ [package.dependencies] opentelemetry-proto = "1.34.1" -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.34.1" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.52,<2.0" -grpcio = {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""} -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.34.1" -opentelemetry-proto = "1.34.1" -opentelemetry-sdk = ">=1.34.1,<1.35.0" -typing-extensions = ">=4.5.0" - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.34.1" @@ -4022,7 +3985,7 @@ langchain = "^0.3.25" langchain-community = "0.3.23" langchain-core = "0.3.68" langchain-openai = "^0.3.27" -langfuse = "3.0.0" +langfuse = "3.3.4" oauthlib = "^3.2.2" openai = "^1.77.0" pydantic = "^2.11.4" @@ -5140,4 +5103,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.13" -content-hash = "88f3259a6b320cbf2d0a21654e6025b953f62adf59dbce780ff8f52f1b1dffce" +content-hash = "473248a6215ddb8edf64461d6f699a40c772731f093a9b478a8fac4b6298e4b3" diff --git a/libs/rag-core-api/pyproject.toml b/libs/rag-core-api/pyproject.toml index d61057a4..301029df 100644 --- a/libs/rag-core-api/pyproject.toml +++ b/libs/rag-core-api/pyproject.toml @@ -31,7 +31,7 @@ pytest-asyncio = "^0.26.0" langchain-community = "0.3.23" fastembed = "^0.6.1" langdetect = "^1.0.9" -langfuse = "3.0.0" +langfuse = "3.3.4" [tool.poetry.group.dev.dependencies] diff --git a/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py b/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py index d933fdd5..0f674108 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py +++ b/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py @@ -7,14 +7,12 @@ from asyncio import gather from datetime import datetime from json import JSONDecodeError -from time import sleep from uuid import uuid4 import ragas from datasets import Dataset from langchain_core.runnables import RunnableConfig from langfuse import Langfuse -from langfuse.api.core.api_error import ApiError from langfuse.api.resources.commons.errors.not_found_error import NotFoundError from langfuse._client.datasets import DatasetClient from ragas.llms import LangchainLLMWrapper @@ -164,67 +162,61 @@ async def _aevaluate_question(self, item, experiment_name: str, generation_time: async with self._semaphore: chat_request = ChatRequest(message=item.input) - try: - response = await self._chat_endpoint.achat(config["metadata"]["session_id"], chat_request) - except Exception as e: - logger.info("Error while answering question %s: %s", item.input, e) - response = None - - if response and response.citations: - output = {"answer": response.answer, "documents": [x.page_content for x in response.citations]} - else: - output = {"answer": None, "documents": None} - - langfuse_generation = self._langfuse.generation( - name=self._settings.evaluation_dataset_name, - input=item.input, - output=output, - start_time=generation_time, - end_time=datetime.now(), - ) - self._link_item2generation(item, langfuse_generation, experiment_name) - - if not (response and response.citations): - for metric in self.METRICS: - langfuse_generation.score( - name=metric.name, - value=self.DEFAULT_SCORE_VALUE, - ) - return - - eval_data = Dataset.from_dict( - { - "question": [item.input], - "answer": [output["answer"]], - "contexts": [output["documents"]], - "ground_truth": [item.expected_output], + # Use item.run context manager for trace + with item.run( + run_name=experiment_name, + run_metadata={"model": self._settings.model}, + run_description=f"Evaluation run for {experiment_name}", + ) as root_span: + # Use langfuse.start_as_current_generation for generation + try: + response = await self._chat_endpoint.achat(config["metadata"]["session_id"], chat_request) + except Exception as e: + logger.info("Error while answering question %s: %s", item.input, e) + response = None + output = { + "answer": response.answer if response else None, + "documents": ( + [x.page_content for x in response.citations] if response and response.citations else None + ), } - ) - - result = ragas.evaluate( - eval_data, - metrics=self.METRICS, - llm=self._llm_wrapped, - embeddings=self._embedder, - ) - for metric, score in result.scores[0].items(): - if math.isnan(score): - score = self.DEFAULT_SCORE_VALUE - langfuse_generation.score( - name=metric, - value=score, - ) + with self._langfuse.start_as_current_generation( + name="rag-eval-llm-call", + input={"question": item.input, "context": output["documents"]}, + metadata={"item_id": item.id, "run": experiment_name}, + model=self._settings.model, + ) as generation: + + generation.update(output=output["answer"]) + generation.update_trace( + input={"question": item.input, "context": output["documents"]}, + metadata={"item_id": item.id, "run": experiment_name}, + output=output["answer"], + ) - def _link_item2generation(self, item, generation, experiment_name, retries: int = 0): - try: - item.link(generation, experiment_name) - except ApiError as e: - logger.warning("Failed to link item to generation: %s", e) - retries += 1 - if retries > self.MAX_RETRIES: - raise e - sleep(1) - self._link_item2generation(item, generation, experiment_name, retries) + # Ragas metrics + if response and response.citations: + eval_data = Dataset.from_dict( + { + "question": [item.input], + "answer": [output["answer"]], + "contexts": [output["documents"]], + "ground_truth": [item.expected_output], + } + ) + result = ragas.evaluate( + eval_data, + metrics=self.METRICS, + llm=self._llm_wrapped, + embeddings=self._embedder, + ) + for metric, score in result.scores[0].items(): + if math.isnan(score): + score = self.DEFAULT_SCORE_VALUE + root_span.score_trace(name=metric, value=score) + else: + for metric in self.METRICS: + root_span.score_trace(name=metric.name, value=self.DEFAULT_SCORE_VALUE) def _get_dataset(self, dataset_name: str) -> DatasetClient: dataset = None @@ -240,7 +232,7 @@ def _get_dataset(self, dataset_name: str) -> DatasetClient: return dataset def _create_dataset(self, dataset_name: str = None): - self._langfuse.create_dataset(dataset_name) + self._langfuse.create_dataset(name=dataset_name) data = self._load_dataset_items() self._store_items_in_dataset(data, dataset_name) diff --git a/libs/rag-core-lib/poetry.lock b/libs/rag-core-lib/poetry.lock index ce06b496..c35db25f 100644 --- a/libs/rag-core-lib/poetry.lock +++ b/libs/rag-core-lib/poetry.lock @@ -1319,70 +1319,6 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] -[[package]] -name = "grpcio" -version = "1.73.1" -description = "HTTP/2-based RPC framework" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "grpcio-1.73.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2d70f4ddd0a823436c2624640570ed6097e40935c9194482475fe8e3d9754d55"}, - {file = "grpcio-1.73.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:3841a8a5a66830261ab6a3c2a3dc539ed84e4ab019165f77b3eeb9f0ba621f26"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:628c30f8e77e0258ab788750ec92059fc3d6628590fb4b7cea8c102503623ed7"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67a0468256c9db6d5ecb1fde4bf409d016f42cef649323f0a08a72f352d1358b"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b84d65bbdebd5926eb5c53b0b9ec3b3f83408a30e4c20c373c5337b4219ec5"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c54796ca22b8349cc594d18b01099e39f2b7ffb586ad83217655781a350ce4da"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:75fc8e543962ece2f7ecd32ada2d44c0c8570ae73ec92869f9af8b944863116d"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6a6037891cd2b1dd1406b388660522e1565ed340b1fea2955b0234bdd941a862"}, - {file = "grpcio-1.73.1-cp310-cp310-win32.whl", hash = "sha256:cce7265b9617168c2d08ae570fcc2af4eaf72e84f8c710ca657cc546115263af"}, - {file = "grpcio-1.73.1-cp310-cp310-win_amd64.whl", hash = "sha256:6a2b372e65fad38842050943f42ce8fee00c6f2e8ea4f7754ba7478d26a356ee"}, - {file = "grpcio-1.73.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:ba2cea9f7ae4bc21f42015f0ec98f69ae4179848ad744b210e7685112fa507a1"}, - {file = "grpcio-1.73.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d74c3f4f37b79e746271aa6cdb3a1d7e4432aea38735542b23adcabaaee0c097"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5b9b1805a7d61c9e90541cbe8dfe0a593dfc8c5c3a43fe623701b6a01b01d710"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3215f69a0670a8cfa2ab53236d9e8026bfb7ead5d4baabe7d7dc11d30fda967"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5eccfd9577a5dc7d5612b2ba90cca4ad14c6d949216c68585fdec9848befb1"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc7d7fd520614fce2e6455ba89791458020a39716951c7c07694f9dbae28e9c0"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:105492124828911f85127e4825d1c1234b032cb9d238567876b5515d01151379"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:610e19b04f452ba6f402ac9aa94eb3d21fbc94553368008af634812c4a85a99e"}, - {file = "grpcio-1.73.1-cp311-cp311-win32.whl", hash = "sha256:d60588ab6ba0ac753761ee0e5b30a29398306401bfbceffe7d68ebb21193f9d4"}, - {file = "grpcio-1.73.1-cp311-cp311-win_amd64.whl", hash = "sha256:6957025a4608bb0a5ff42abd75bfbb2ed99eda29d5992ef31d691ab54b753643"}, - {file = "grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf"}, - {file = "grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8"}, - {file = "grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642"}, - {file = "grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646"}, - {file = "grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9"}, - {file = "grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668"}, - {file = "grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4"}, - {file = "grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f"}, - {file = "grpcio-1.73.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:b4adc97d2d7f5c660a5498bda978ebb866066ad10097265a5da0511323ae9f50"}, - {file = "grpcio-1.73.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c45a28a0cfb6ddcc7dc50a29de44ecac53d115c3388b2782404218db51cb2df3"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:10af9f2ab98a39f5b6c1896c6fc2036744b5b41d12739d48bed4c3e15b6cf900"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45cf17dcce5ebdb7b4fe9e86cb338fa99d7d1bb71defc78228e1ddf8d0de8cbb"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c502c2e950fc7e8bf05c047e8a14522ef7babac59abbfde6dbf46b7a0d9c71e"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6abfc0f9153dc4924536f40336f88bd4fe7bd7494f028675e2e04291b8c2c62a"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ed451a0e39c8e51eb1612b78686839efd1a920666d1666c1adfdb4fd51680c0f"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:07f08705a5505c9b5b0cbcbabafb96462b5a15b7236bbf6bbcc6b0b91e1cbd7e"}, - {file = "grpcio-1.73.1-cp39-cp39-win32.whl", hash = "sha256:ad5c958cc3d98bb9d71714dc69f1c13aaf2f4b53e29d4cc3f1501ef2e4d129b2"}, - {file = "grpcio-1.73.1-cp39-cp39-win_amd64.whl", hash = "sha256:42f0660bce31b745eb9d23f094a332d31f210dcadd0fc8e5be7e4c62a87ce86b"}, - {file = "grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.73.1)"] - [[package]] name = "h11" version = "0.16.0" @@ -1816,23 +1752,23 @@ langchain-core = ">=0.3.51,<1.0.0" [[package]] name = "langfuse" -version = "3.0.0" +version = "3.3.4" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.9" groups = ["main"] files = [ - {file = "langfuse-3.0.0-py3-none-any.whl", hash = "sha256:5f493bd15760b15195cc71ec7aaa4f93008058a1f5dfe4b32f72f31cbdc6f605"}, - {file = "langfuse-3.0.0.tar.gz", hash = "sha256:c47449ae93a3007efee6b861484f529efb187ddbfb5093e0cb94b84636e0a605"}, + {file = "langfuse-3.3.4-py3-none-any.whl", hash = "sha256:15b9d20878cf39a48ca9cfa7e52acdfeb043603d3a9cef8cf451687a4d838c6b"}, + {file = "langfuse-3.3.4.tar.gz", hash = "sha256:e5df4e7284298990b522e02a1dc6c3c72ebc4a7a411dc7d39255fb8c2e5a7c3a"}, ] [package.dependencies] backoff = ">=1.10.0" httpx = ">=0.15.4,<1.0" opentelemetry-api = ">=1.33.1,<2.0.0" -opentelemetry-exporter-otlp = ">=1.33.1,<2.0.0" +opentelemetry-exporter-otlp-proto-http = ">=1.33.1,<2.0.0" opentelemetry-sdk = ">=1.33.1,<2.0.0" -packaging = ">=23.2,<25.0" +packaging = ">=23.2,<26.0" pydantic = ">=1.10.7,<3.0" requests = ">=2,<3" wrapt = ">=1.14,<2.0" @@ -2245,22 +2181,6 @@ files = [ importlib-metadata = ">=6.0,<8.8.0" typing-extensions = ">=4.5.0" -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.34.1" -description = "OpenTelemetry Collector Exporters" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp-1.34.1-py3-none-any.whl", hash = "sha256:f4a453e9cde7f6362fd4a090d8acf7881d1dc585540c7b65cbd63e36644238d4"}, - {file = "opentelemetry_exporter_otlp-1.34.1.tar.gz", hash = "sha256:71c9ad342d665d9e4235898d205db17c5764cd7a69acb8a5dcd6d5e04c4c9988"}, -] - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.34.1" -opentelemetry-exporter-otlp-proto-http = "1.34.1" - [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.34.1" @@ -2276,27 +2196,6 @@ files = [ [package.dependencies] opentelemetry-proto = "1.34.1" -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.34.1" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.52,<2.0" -grpcio = {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""} -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.34.1" -opentelemetry-proto = "1.34.1" -opentelemetry-sdk = ">=1.34.1,<1.35.0" -typing-extensions = ">=4.5.0" - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.34.1" @@ -3856,4 +3755,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.13" -content-hash = "154d1d942e5fc55e3e24dbc03a58ca56dbe78f2c361024b72abb46af1353f1ec" +content-hash = "dd5c909bbf9fb89e9a2cc3d12a796ef92439b37ae62646431146b3f8dae278b0" diff --git a/libs/rag-core-lib/pyproject.toml b/libs/rag-core-lib/pyproject.toml index 086849f9..0f357328 100644 --- a/libs/rag-core-lib/pyproject.toml +++ b/libs/rag-core-lib/pyproject.toml @@ -18,7 +18,7 @@ pydantic-settings = "^2.2.1" pydantic = "^2.11.4" oauthlib = "^3.2.2" requests-oauthlib = "^2.0.0" -langfuse = "3.0.0" +langfuse = "3.3.4" deprecated = "^1.2.18" openai = "^1.77.0" langchain-core = "0.3.68" diff --git a/libs/rag-core-lib/src/rag_core_lib/tracers/traced_runnable.py b/libs/rag-core-lib/src/rag_core_lib/tracers/traced_runnable.py index c43978fd..4676523f 100644 --- a/libs/rag-core-lib/src/rag_core_lib/tracers/traced_runnable.py +++ b/libs/rag-core-lib/src/rag_core_lib/tracers/traced_runnable.py @@ -66,9 +66,11 @@ async def ainvoke( config = ensure_config(config) session_id = self._get_session_id(config) config_with_tracing = self._add_tracing_callback(config) - with self.langfuse_client.start_as_current_span(name="traced_runnable") as span: - span.update_trace(session_id=session_id) - return await self._inner_chain.ainvoke(chain_input, config=config_with_tracing) + with self.langfuse_client.start_as_current_span(name=self._inner_chain.__class__.__name__) as span: + span.update_trace(session_id=session_id, input=chain_input) + output = await self._inner_chain.ainvoke(chain_input, config=config_with_tracing) + span.update_trace(output=output) + return output @abstractmethod def _add_tracing_callback(self, config: Optional[RunnableConfig]) -> RunnableConfig: ... diff --git a/services/admin-backend/poetry.lock b/services/admin-backend/poetry.lock index b780b7a3..827bb0a4 100644 --- a/services/admin-backend/poetry.lock +++ b/services/admin-backend/poetry.lock @@ -14,7 +14,7 @@ develop = true boto3 = "^1.38.10" dependency-injector = "^4.46.0" fastapi = "^0.115.12" -langfuse = "3.0.0" +langfuse = "3.3.4" python-dateutil = "^2.9.0.post0" python-multipart = "^0.0.20" pyyaml = "^6.0.2" @@ -1441,70 +1441,6 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] -[[package]] -name = "grpcio" -version = "1.73.1" -description = "HTTP/2-based RPC framework" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "grpcio-1.73.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2d70f4ddd0a823436c2624640570ed6097e40935c9194482475fe8e3d9754d55"}, - {file = "grpcio-1.73.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:3841a8a5a66830261ab6a3c2a3dc539ed84e4ab019165f77b3eeb9f0ba621f26"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:628c30f8e77e0258ab788750ec92059fc3d6628590fb4b7cea8c102503623ed7"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67a0468256c9db6d5ecb1fde4bf409d016f42cef649323f0a08a72f352d1358b"}, - {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b84d65bbdebd5926eb5c53b0b9ec3b3f83408a30e4c20c373c5337b4219ec5"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c54796ca22b8349cc594d18b01099e39f2b7ffb586ad83217655781a350ce4da"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:75fc8e543962ece2f7ecd32ada2d44c0c8570ae73ec92869f9af8b944863116d"}, - {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6a6037891cd2b1dd1406b388660522e1565ed340b1fea2955b0234bdd941a862"}, - {file = "grpcio-1.73.1-cp310-cp310-win32.whl", hash = "sha256:cce7265b9617168c2d08ae570fcc2af4eaf72e84f8c710ca657cc546115263af"}, - {file = "grpcio-1.73.1-cp310-cp310-win_amd64.whl", hash = "sha256:6a2b372e65fad38842050943f42ce8fee00c6f2e8ea4f7754ba7478d26a356ee"}, - {file = "grpcio-1.73.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:ba2cea9f7ae4bc21f42015f0ec98f69ae4179848ad744b210e7685112fa507a1"}, - {file = "grpcio-1.73.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d74c3f4f37b79e746271aa6cdb3a1d7e4432aea38735542b23adcabaaee0c097"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5b9b1805a7d61c9e90541cbe8dfe0a593dfc8c5c3a43fe623701b6a01b01d710"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3215f69a0670a8cfa2ab53236d9e8026bfb7ead5d4baabe7d7dc11d30fda967"}, - {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5eccfd9577a5dc7d5612b2ba90cca4ad14c6d949216c68585fdec9848befb1"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc7d7fd520614fce2e6455ba89791458020a39716951c7c07694f9dbae28e9c0"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:105492124828911f85127e4825d1c1234b032cb9d238567876b5515d01151379"}, - {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:610e19b04f452ba6f402ac9aa94eb3d21fbc94553368008af634812c4a85a99e"}, - {file = "grpcio-1.73.1-cp311-cp311-win32.whl", hash = "sha256:d60588ab6ba0ac753761ee0e5b30a29398306401bfbceffe7d68ebb21193f9d4"}, - {file = "grpcio-1.73.1-cp311-cp311-win_amd64.whl", hash = "sha256:6957025a4608bb0a5ff42abd75bfbb2ed99eda29d5992ef31d691ab54b753643"}, - {file = "grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf"}, - {file = "grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918"}, - {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1"}, - {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8"}, - {file = "grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642"}, - {file = "grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646"}, - {file = "grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9"}, - {file = "grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182"}, - {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5"}, - {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668"}, - {file = "grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4"}, - {file = "grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f"}, - {file = "grpcio-1.73.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:b4adc97d2d7f5c660a5498bda978ebb866066ad10097265a5da0511323ae9f50"}, - {file = "grpcio-1.73.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c45a28a0cfb6ddcc7dc50a29de44ecac53d115c3388b2782404218db51cb2df3"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:10af9f2ab98a39f5b6c1896c6fc2036744b5b41d12739d48bed4c3e15b6cf900"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45cf17dcce5ebdb7b4fe9e86cb338fa99d7d1bb71defc78228e1ddf8d0de8cbb"}, - {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c502c2e950fc7e8bf05c047e8a14522ef7babac59abbfde6dbf46b7a0d9c71e"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6abfc0f9153dc4924536f40336f88bd4fe7bd7494f028675e2e04291b8c2c62a"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ed451a0e39c8e51eb1612b78686839efd1a920666d1666c1adfdb4fd51680c0f"}, - {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:07f08705a5505c9b5b0cbcbabafb96462b5a15b7236bbf6bbcc6b0b91e1cbd7e"}, - {file = "grpcio-1.73.1-cp39-cp39-win32.whl", hash = "sha256:ad5c958cc3d98bb9d71714dc69f1c13aaf2f4b53e29d4cc3f1501ef2e4d129b2"}, - {file = "grpcio-1.73.1-cp39-cp39-win_amd64.whl", hash = "sha256:42f0660bce31b745eb9d23f094a332d31f210dcadd0fc8e5be7e4c62a87ce86b"}, - {file = "grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.73.1)"] - [[package]] name = "h11" version = "0.16.0" @@ -1950,23 +1886,23 @@ langchain-core = ">=0.3.51,<1.0.0" [[package]] name = "langfuse" -version = "3.0.0" +version = "3.3.4" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.9" groups = ["main"] files = [ - {file = "langfuse-3.0.0-py3-none-any.whl", hash = "sha256:5f493bd15760b15195cc71ec7aaa4f93008058a1f5dfe4b32f72f31cbdc6f605"}, - {file = "langfuse-3.0.0.tar.gz", hash = "sha256:c47449ae93a3007efee6b861484f529efb187ddbfb5093e0cb94b84636e0a605"}, + {file = "langfuse-3.3.4-py3-none-any.whl", hash = "sha256:15b9d20878cf39a48ca9cfa7e52acdfeb043603d3a9cef8cf451687a4d838c6b"}, + {file = "langfuse-3.3.4.tar.gz", hash = "sha256:e5df4e7284298990b522e02a1dc6c3c72ebc4a7a411dc7d39255fb8c2e5a7c3a"}, ] [package.dependencies] backoff = ">=1.10.0" httpx = ">=0.15.4,<1.0" opentelemetry-api = ">=1.33.1,<2.0.0" -opentelemetry-exporter-otlp = ">=1.33.1,<2.0.0" +opentelemetry-exporter-otlp-proto-http = ">=1.33.1,<2.0.0" opentelemetry-sdk = ">=1.33.1,<2.0.0" -packaging = ">=23.2,<25.0" +packaging = ">=23.2,<26.0" pydantic = ">=1.10.7,<3.0" requests = ">=2,<3" wrapt = ">=1.14,<2.0" @@ -2379,22 +2315,6 @@ files = [ importlib-metadata = ">=6.0,<8.8.0" typing-extensions = ">=4.5.0" -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.34.1" -description = "OpenTelemetry Collector Exporters" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp-1.34.1-py3-none-any.whl", hash = "sha256:f4a453e9cde7f6362fd4a090d8acf7881d1dc585540c7b65cbd63e36644238d4"}, - {file = "opentelemetry_exporter_otlp-1.34.1.tar.gz", hash = "sha256:71c9ad342d665d9e4235898d205db17c5764cd7a69acb8a5dcd6d5e04c4c9988"}, -] - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.34.1" -opentelemetry-exporter-otlp-proto-http = "1.34.1" - [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.34.1" @@ -2410,27 +2330,6 @@ files = [ [package.dependencies] opentelemetry-proto = "1.34.1" -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.34.1" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.52,<2.0" -grpcio = {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""} -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.34.1" -opentelemetry-proto = "1.34.1" -opentelemetry-sdk = ">=1.34.1,<1.35.0" -typing-extensions = ">=4.5.0" - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.34.1" @@ -3169,7 +3068,7 @@ langchain = "^0.3.25" langchain-community = "0.3.23" langchain-core = "0.3.68" langchain-openai = "^0.3.27" -langfuse = "3.0.0" +langfuse = "3.3.4" oauthlib = "^3.2.2" openai = "^1.77.0" pydantic = "^2.11.4" diff --git a/services/rag-backend/poetry.lock b/services/rag-backend/poetry.lock index 84f298e8..13a4090a 100644 --- a/services/rag-backend/poetry.lock +++ b/services/rag-backend/poetry.lock @@ -2072,23 +2072,23 @@ six = "*" [[package]] name = "langfuse" -version = "3.0.0" +version = "3.3.4" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.9" groups = ["main"] files = [ - {file = "langfuse-3.0.0-py3-none-any.whl", hash = "sha256:5f493bd15760b15195cc71ec7aaa4f93008058a1f5dfe4b32f72f31cbdc6f605"}, - {file = "langfuse-3.0.0.tar.gz", hash = "sha256:c47449ae93a3007efee6b861484f529efb187ddbfb5093e0cb94b84636e0a605"}, + {file = "langfuse-3.3.4-py3-none-any.whl", hash = "sha256:15b9d20878cf39a48ca9cfa7e52acdfeb043603d3a9cef8cf451687a4d838c6b"}, + {file = "langfuse-3.3.4.tar.gz", hash = "sha256:e5df4e7284298990b522e02a1dc6c3c72ebc4a7a411dc7d39255fb8c2e5a7c3a"}, ] [package.dependencies] backoff = ">=1.10.0" httpx = ">=0.15.4,<1.0" opentelemetry-api = ">=1.33.1,<2.0.0" -opentelemetry-exporter-otlp = ">=1.33.1,<2.0.0" +opentelemetry-exporter-otlp-proto-http = ">=1.33.1,<2.0.0" opentelemetry-sdk = ">=1.33.1,<2.0.0" -packaging = ">=23.2,<25.0" +packaging = ">=23.2,<26.0" pydantic = ">=1.10.7,<3.0" requests = ">=2,<3" wrapt = ">=1.14,<2.0" @@ -2740,22 +2740,6 @@ files = [ importlib-metadata = ">=6.0,<8.8.0" typing-extensions = ">=4.5.0" -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.34.1" -description = "OpenTelemetry Collector Exporters" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp-1.34.1-py3-none-any.whl", hash = "sha256:f4a453e9cde7f6362fd4a090d8acf7881d1dc585540c7b65cbd63e36644238d4"}, - {file = "opentelemetry_exporter_otlp-1.34.1.tar.gz", hash = "sha256:71c9ad342d665d9e4235898d205db17c5764cd7a69acb8a5dcd6d5e04c4c9988"}, -] - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.34.1" -opentelemetry-exporter-otlp-proto-http = "1.34.1" - [[package]] name = "opentelemetry-exporter-otlp-proto-common" version = "1.34.1" @@ -2771,27 +2755,6 @@ files = [ [package.dependencies] opentelemetry-proto = "1.34.1" -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.34.1" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.52,<2.0" -grpcio = {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""} -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.34.1" -opentelemetry-proto = "1.34.1" -opentelemetry-sdk = ">=1.34.1,<1.35.0" -typing-extensions = ">=4.5.0" - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.34.1" @@ -4026,7 +3989,7 @@ langchain-community = "0.3.23" langchain-ollama = "^0.3.2" langchain-qdrant = "^0.2.0" langdetect = "^1.0.9" -langfuse = "3.0.0" +langfuse = "3.3.4" langgraph = "^0.4.2" openai = "^1.77.0" pillow = "^11.2.1" @@ -4059,7 +4022,7 @@ langchain = "^0.3.25" langchain-community = "0.3.23" langchain-core = "0.3.68" langchain-openai = "^0.3.27" -langfuse = "3.0.0" +langfuse = "3.3.4" oauthlib = "^3.2.2" openai = "^1.77.0" pydantic = "^2.11.4" From c7676fda0fccaee116cc27337034040436592600 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 7 Oct 2025 12:25:24 +0200 Subject: [PATCH 02/11] chore: add esbuild as a dependency in package.json and package-lock.json --- services/frontend/package-lock.json | 1 + services/frontend/package.json | 1 + 2 files changed, 2 insertions(+) diff --git a/services/frontend/package-lock.json b/services/frontend/package-lock.json index 865d3681..75936e95 100644 --- a/services/frontend/package-lock.json +++ b/services/frontend/package-lock.json @@ -43,6 +43,7 @@ "axios": "^1.9.0", "cypress": "^13.0.0", "daisyui": "^4.7.2", + "esbuild": "^0.25.10", "eslint": "^9.36.0", "eslint-plugin-cypress": "2.15.2", "eslint-plugin-import": "2.32.0", diff --git a/services/frontend/package.json b/services/frontend/package.json index 6367ff85..c98067ac 100644 --- a/services/frontend/package.json +++ b/services/frontend/package.json @@ -28,6 +28,7 @@ "vue-router": "^4.2.5" }, "devDependencies": { + "esbuild": "^0.25.10", "@eslint/js": "^9.36.0", "@intlify/unplugin-vue-i18n": "^6.0.0", "@nrwl/eslint-plugin-nx": "^19.8.14", From a1683ec17889b3a233b108ee6fb1cd31873a3648 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 7 Oct 2025 12:50:53 +0200 Subject: [PATCH 03/11] chore: update Tiltfile to simplify ignore patterns and enhance k3d cluster setup script with ingress-nginx installation --- Tiltfile | 14 ++-- .../local-cluster-setup/setup-k3d-cluster.sh | 67 ++++++++++++++++--- .../server-setup/base-setup/Chart.lock | 10 +-- .../server-setup/base-setup/Chart.yaml | 9 +-- .../server-setup/base-setup/values.yaml | 8 +++ 5 files changed, 81 insertions(+), 27 deletions(-) diff --git a/Tiltfile b/Tiltfile index a387a99f..3ebfd4ac 100644 --- a/Tiltfile +++ b/Tiltfile @@ -165,8 +165,7 @@ docker_build( dockerfile=backend_context + "/Dockerfile", ignore=[ "infrastructure/", - "services/frontend/.nx/", - "services/frontend/tmp/", + "services/frontend/", ], ) @@ -209,8 +208,7 @@ docker_build( dockerfile=mcp_context + "/Dockerfile", ignore=[ "infrastructure/", - "services/frontend/.nx/", - "services/frontend/tmp/", + "services/frontend/", ], ) @@ -246,7 +244,7 @@ docker_build( sync(core_library_context + "/admin-api-lib", "/app/libs/admin-api-lib"), ], dockerfile=admin_backend_context + "/Dockerfile", - ignore=["infrastructure/"], + ignore=["infrastructure/","services/frontend/"], ) # Add linter trigger @@ -290,7 +288,7 @@ docker_build( sync(core_library_context +"/extractor-api-lib", "/app/libs/extractor-api-lib"), ], dockerfile=extractor_context + "/Dockerfile", - ignore=["infrastructure/"], + ignore=["infrastructure/","services/frontend/"], ) # Add linter trigger @@ -331,7 +329,7 @@ docker_build( sync("./services/frontend/dist/apps/chat-app", "/usr/share/nginx/html"), sync("./services/frontend/dist/libs", "/usr/share/nginx/html/libs"), ], - ignore=["infrastructure/"], + ignore=["infrastructure/", "services/frontend/.nx/", "services/frontend/tmp/", "services/frontend/node_modules/"], ) ######################################################################################################################## @@ -349,7 +347,7 @@ docker_build( sync("./services/frontend/dist/apps/admin-app", "/usr/share/nginx/html"), sync("./services/frontend/dist/libs", "/usr/share/nginx/html/libs"), ], - ignore=["infrastructure/"], + ignore=["infrastructure/", "services/frontend/.nx/", "services/frontend/tmp/", "services/frontend/node_modules/"], ) diff --git a/infrastructure/local-cluster-setup/setup-k3d-cluster.sh b/infrastructure/local-cluster-setup/setup-k3d-cluster.sh index 86915614..5814f9fd 100755 --- a/infrastructure/local-cluster-setup/setup-k3d-cluster.sh +++ b/infrastructure/local-cluster-setup/setup-k3d-cluster.sh @@ -1,17 +1,64 @@ #!/bin/bash -k3d cluster create rag --config k3d-cluster-config.yaml --k3s-arg "--disable=traefik@server:*" -kubectl wait --for=condition=ready node --all --timeout=120s +# Fail fast on errors, unset vars, and propagate pipe failures +set -euo pipefail -REPO_NAME="bitnami" -# Add the Helm repository if it doesn't exist -if ! helm repo list | grep -q "$REPO_NAME"; then - echo "Adding Helm repository $REPO_NAME..." - helm repo add $REPO_NAME https://charts.bitnami.com/bitnami - helm repo update +CLUSTER_NAME="rag" +K3D_CONFIG_FILE="k3d-cluster-config.yaml" + +echo "Creating k3d cluster '${CLUSTER_NAME}' (if it does not already exist)..." +if k3d cluster list | awk '{print $1}' | grep -qx "${CLUSTER_NAME}"; then + echo "Cluster '${CLUSTER_NAME}' already exists. Skipping create." +else + k3d cluster create "${CLUSTER_NAME}" --config "${K3D_CONFIG_FILE}" --k3s-arg "--disable=traefik@server:*" +fi + +echo "Waiting for all nodes to become Ready..." +kubectl wait --for=condition=ready node --all --timeout=120s || { + echo "WARNING: Some nodes did not reach Ready state within timeout." >&2 +} + +# Install / upgrade official ingress-nginx Helm chart (not Bitnami). User requested: +# helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ +# --repo https://kubernetes.github.io/ingress-nginx \ +# --namespace ingress-nginx --create-namespace \ +# --version 4.13.3 \ +# --set controller.image.tag=v1.13.3 + +INGRESS_REPO_NAME="ingress-nginx" +INGRESS_REPO_URL="https://kubernetes.github.io/ingress-nginx" +INGRESS_NAMESPACE="ingress-nginx" +INGRESS_RELEASE="ingress-nginx" +INGRESS_CHART="ingress-nginx/ingress-nginx" +INGRESS_CHART_VERSION="${INGRESS_CHART_VERSION:-4.13.3}" +INGRESS_CONTROLLER_IMAGE_TAG="${INGRESS_CONTROLLER_IMAGE_TAG:-v1.13.3}" + +if ! helm repo list | awk '{print $1}' | grep -qx "$INGRESS_REPO_NAME"; then + echo "Adding Helm repository $INGRESS_REPO_NAME ($INGRESS_REPO_URL)..." + helm repo add "$INGRESS_REPO_NAME" "$INGRESS_REPO_URL" else - echo "Helm repository $REPO_NAME already exists." + echo "Helm repository $INGRESS_REPO_NAME already exists." fi +echo "Updating Helm repository $INGRESS_REPO_NAME..." +helm repo update "$INGRESS_REPO_NAME" + +echo "Installing / upgrading '$INGRESS_RELEASE' chart version ${INGRESS_CHART_VERSION} with controller.image.tag=${INGRESS_CONTROLLER_IMAGE_TAG}" +helm upgrade --install "$INGRESS_RELEASE" "$INGRESS_CHART" \ + --namespace "$INGRESS_NAMESPACE" \ + --create-namespace \ + --version "$INGRESS_CHART_VERSION" \ + --set controller.image.tag="$INGRESS_CONTROLLER_IMAGE_TAG" + +echo "Waiting for ingress controller deployment rollout..." +if kubectl rollout status deployment/${INGRESS_RELEASE}-controller -n "$INGRESS_NAMESPACE" --timeout=180s; then + echo "Ingress controller successfully rolled out." +else + echo "Rollout not complete. Recent events:" >&2 + kubectl -n "$INGRESS_NAMESPACE" get events --sort-by=.lastTimestamp | tail -n 30 || true +fi + +echo "Current ingress-nginx pods:" +kubectl get pods -n "$INGRESS_NAMESPACE" -l app.kubernetes.io/name=ingress-nginx || true + -helm install nginx-ingress-controller bitnami/nginx-ingress-controller --namespace nginx-ingress --version "10.3.0" --create-namespace diff --git a/infrastructure/server-setup/base-setup/Chart.lock b/infrastructure/server-setup/base-setup/Chart.lock index 367ddc52..c110f774 100644 --- a/infrastructure/server-setup/base-setup/Chart.lock +++ b/infrastructure/server-setup/base-setup/Chart.lock @@ -2,8 +2,8 @@ dependencies: - name: cert-manager repository: https://charts.jetstack.io version: v1.18.2 -- name: nginx-ingress-controller - repository: https://charts.bitnami.com/bitnami - version: 11.6.27 -digest: sha256:3197e71b2e91041da3bca61187bd413ea86ed7778a8cecf363182be1aaead1f0 -generated: "2025-07-31T08:32:45.824521293Z" +- name: ingress-nginx + repository: https://kubernetes.github.io/ingress-nginx + version: 4.13.3 +digest: sha256:2da733e46f388692106e8487aa48b1593366cc611ffac246891dbbbe0b9a7943 +generated: "2025-10-07T12:41:19.535888+02:00" diff --git a/infrastructure/server-setup/base-setup/Chart.yaml b/infrastructure/server-setup/base-setup/Chart.yaml index 938ea330..311220e3 100644 --- a/infrastructure/server-setup/base-setup/Chart.yaml +++ b/infrastructure/server-setup/base-setup/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: basic description: | - This helm chart deploys the basic nginx-ingress controller and cert-manager. + This helm chart deploys the official Kubernetes ingress-nginx controller and cert-manager. type: application version: 0.0.1 appVersion: "0.0.1" @@ -9,6 +9,7 @@ dependencies: - name: cert-manager version: "v1.18.2" repository: https://charts.jetstack.io -- name: nginx-ingress-controller - version: "11.6.27" - repository: https://charts.bitnami.com/bitnami +- name: ingress-nginx + version: "4.13.3" + repository: https://kubernetes.github.io/ingress-nginx + alias: ingress-nginx diff --git a/infrastructure/server-setup/base-setup/values.yaml b/infrastructure/server-setup/base-setup/values.yaml index 81e08f8f..2f6f1764 100644 --- a/infrastructure/server-setup/base-setup/values.yaml +++ b/infrastructure/server-setup/base-setup/values.yaml @@ -1,2 +1,10 @@ cert-manager: installCRDs: false + +# Configuration overrides for the official ingress-nginx chart dependency. +# Pin the controller image tag to v1.13.3 (matching the script installation). +ingress-nginx: + controller: + image: + # registry and image default to the chart's defaults; only pin tag here. + tag: v1.13.3 From bcb6edd8b871da0de3e0e37c5f581fe78a959e08 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 7 Oct 2025 12:59:30 +0200 Subject: [PATCH 04/11] chore: remove commented-out Helm chart installation for ingress-nginx --- infrastructure/local-cluster-setup/setup-k3d-cluster.sh | 7 ------- 1 file changed, 7 deletions(-) diff --git a/infrastructure/local-cluster-setup/setup-k3d-cluster.sh b/infrastructure/local-cluster-setup/setup-k3d-cluster.sh index 5814f9fd..01ecc027 100755 --- a/infrastructure/local-cluster-setup/setup-k3d-cluster.sh +++ b/infrastructure/local-cluster-setup/setup-k3d-cluster.sh @@ -18,13 +18,6 @@ kubectl wait --for=condition=ready node --all --timeout=120s || { echo "WARNING: Some nodes did not reach Ready state within timeout." >&2 } -# Install / upgrade official ingress-nginx Helm chart (not Bitnami). User requested: -# helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ -# --repo https://kubernetes.github.io/ingress-nginx \ -# --namespace ingress-nginx --create-namespace \ -# --version 4.13.3 \ -# --set controller.image.tag=v1.13.3 - INGRESS_REPO_NAME="ingress-nginx" INGRESS_REPO_URL="https://kubernetes.github.io/ingress-nginx" INGRESS_NAMESPACE="ingress-nginx" From 35cf552da1f80f84b626a5c3aee9af851c31ae87 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Tue, 7 Oct 2025 15:23:34 +0200 Subject: [PATCH 05/11] Update infrastructure/server-setup/base-setup/Chart.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- infrastructure/server-setup/base-setup/Chart.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/infrastructure/server-setup/base-setup/Chart.yaml b/infrastructure/server-setup/base-setup/Chart.yaml index 311220e3..5ea66590 100644 --- a/infrastructure/server-setup/base-setup/Chart.yaml +++ b/infrastructure/server-setup/base-setup/Chart.yaml @@ -12,4 +12,3 @@ dependencies: - name: ingress-nginx version: "4.13.3" repository: https://kubernetes.github.io/ingress-nginx - alias: ingress-nginx From bfcebefea9b7ab68364fae8a912d41154c5507e8 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Wed, 8 Oct 2025 08:44:12 +0200 Subject: [PATCH 06/11] refactor: streamline k3d cluster creation logic and improve Helm chart installation process --- Tiltfile | 33 ++++++++++++------- .../local-cluster-setup/setup-k3d-cluster.sh | 26 ++++++++++----- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/Tiltfile b/Tiltfile index 3ebfd4ac..4d85161a 100644 --- a/Tiltfile +++ b/Tiltfile @@ -145,6 +145,11 @@ local_resource( ################################## build backend_rag image and do live update ########################################## ######################################################################################################################## +IGNORE_BASE = [ + "infrastructure/", + "services/frontend/", +], + # NOTE: full image names should match the one in the helm chart values.yaml! registry = "ghcr.io/stackitcloud/rag-template" rag_api_image_name = "rag-backend" @@ -163,10 +168,7 @@ docker_build( sync(core_library_context+"/rag-core-lib", "/app/libs/rag-core-lib"), ], dockerfile=backend_context + "/Dockerfile", - ignore=[ - "infrastructure/", - "services/frontend/", - ], + ignore=IGNORE_BASE ) # Add linter trigger @@ -206,10 +208,7 @@ docker_build( sync(mcp_context, "/app/services/mcp-server"), ], dockerfile=mcp_context + "/Dockerfile", - ignore=[ - "infrastructure/", - "services/frontend/", - ], + ignore=IGNORE_BASE, ) # Add linter trigger @@ -244,7 +243,7 @@ docker_build( sync(core_library_context + "/admin-api-lib", "/app/libs/admin-api-lib"), ], dockerfile=admin_backend_context + "/Dockerfile", - ignore=["infrastructure/","services/frontend/"], + ignore=IGNORE_BASE, ) # Add linter trigger @@ -288,7 +287,7 @@ docker_build( sync(core_library_context +"/extractor-api-lib", "/app/libs/extractor-api-lib"), ], dockerfile=extractor_context + "/Dockerfile", - ignore=["infrastructure/","services/frontend/"], + ignore=IGNORE_BASE, ) # Add linter trigger @@ -329,7 +328,12 @@ docker_build( sync("./services/frontend/dist/apps/chat-app", "/usr/share/nginx/html"), sync("./services/frontend/dist/libs", "/usr/share/nginx/html/libs"), ], - ignore=["infrastructure/", "services/frontend/.nx/", "services/frontend/tmp/", "services/frontend/node_modules/"], + ignore=[ + "infrastructure/", + "services/frontend/.nx/", + "services/frontend/tmp/", + "services/frontend/node_modules/", + ], ) ######################################################################################################################## @@ -347,7 +351,12 @@ docker_build( sync("./services/frontend/dist/apps/admin-app", "/usr/share/nginx/html"), sync("./services/frontend/dist/libs", "/usr/share/nginx/html/libs"), ], - ignore=["infrastructure/", "services/frontend/.nx/", "services/frontend/tmp/", "services/frontend/node_modules/"], + ignore=[ + "infrastructure/", + "services/frontend/.nx/", + "services/frontend/tmp/", + "services/frontend/node_modules/", + ], ) diff --git a/infrastructure/local-cluster-setup/setup-k3d-cluster.sh b/infrastructure/local-cluster-setup/setup-k3d-cluster.sh index 01ecc027..95300052 100755 --- a/infrastructure/local-cluster-setup/setup-k3d-cluster.sh +++ b/infrastructure/local-cluster-setup/setup-k3d-cluster.sh @@ -7,10 +7,22 @@ CLUSTER_NAME="rag" K3D_CONFIG_FILE="k3d-cluster-config.yaml" echo "Creating k3d cluster '${CLUSTER_NAME}' (if it does not already exist)..." -if k3d cluster list | awk '{print $1}' | grep -qx "${CLUSTER_NAME}"; then - echo "Cluster '${CLUSTER_NAME}' already exists. Skipping create." +cluster_exists=false +if command -v jq >/dev/null 2>&1; then + if k3d cluster list -o json 2>/dev/null | jq -e --arg name "$CLUSTER_NAME" 'map(select(.name==$name)) | length > 0' >/dev/null; then + cluster_exists=true + fi else - k3d cluster create "${CLUSTER_NAME}" --config "${K3D_CONFIG_FILE}" --k3s-arg "--disable=traefik@server:*" + # Fallback without jq (less robust) + if k3d cluster list --no-headers 2>/dev/null | awk '{print $1}' | grep -qx "${CLUSTER_NAME}"; then + cluster_exists=true + fi +fi + +if [ "${cluster_exists}" = true ]; then + echo "Cluster '${CLUSTER_NAME}' already exists. Skipping create." +else + k3d cluster create "${CLUSTER_NAME}" --config "${K3D_CONFIG_FILE}" --k3s-arg "--disable=traefik@server:*" fi echo "Waiting for all nodes to become Ready..." @@ -24,8 +36,6 @@ INGRESS_NAMESPACE="ingress-nginx" INGRESS_RELEASE="ingress-nginx" INGRESS_CHART="ingress-nginx/ingress-nginx" INGRESS_CHART_VERSION="${INGRESS_CHART_VERSION:-4.13.3}" -INGRESS_CONTROLLER_IMAGE_TAG="${INGRESS_CONTROLLER_IMAGE_TAG:-v1.13.3}" - if ! helm repo list | awk '{print $1}' | grep -qx "$INGRESS_REPO_NAME"; then echo "Adding Helm repository $INGRESS_REPO_NAME ($INGRESS_REPO_URL)..." helm repo add "$INGRESS_REPO_NAME" "$INGRESS_REPO_URL" @@ -35,13 +45,11 @@ fi echo "Updating Helm repository $INGRESS_REPO_NAME..." helm repo update "$INGRESS_REPO_NAME" -echo "Installing / upgrading '$INGRESS_RELEASE' chart version ${INGRESS_CHART_VERSION} with controller.image.tag=${INGRESS_CONTROLLER_IMAGE_TAG}" +echo "Installing / upgrading '$INGRESS_RELEASE' chart version ${INGRESS_CHART_VERSION}" helm upgrade --install "$INGRESS_RELEASE" "$INGRESS_CHART" \ --namespace "$INGRESS_NAMESPACE" \ --create-namespace \ - --version "$INGRESS_CHART_VERSION" \ - --set controller.image.tag="$INGRESS_CONTROLLER_IMAGE_TAG" - + --version "$INGRESS_CHART_VERSION" echo "Waiting for ingress controller deployment rollout..." if kubectl rollout status deployment/${INGRESS_RELEASE}-controller -n "$INGRESS_NAMESPACE" --timeout=180s; then echo "Ingress controller successfully rolled out." From 23b4b88c170ce79640ebb6b521ef9059b3796b7f Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Wed, 8 Oct 2025 08:45:58 +0200 Subject: [PATCH 07/11] fix: correct formatting of IGNORE_BASE list in Tiltfile --- Tiltfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tiltfile b/Tiltfile index 4d85161a..24ac28e4 100644 --- a/Tiltfile +++ b/Tiltfile @@ -148,7 +148,7 @@ local_resource( IGNORE_BASE = [ "infrastructure/", "services/frontend/", -], +] # NOTE: full image names should match the one in the helm chart values.yaml! registry = "ghcr.io/stackitcloud/rag-template" From fcc14c9c86cae46ef81241e108f3ab0dd03f1459 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Wed, 8 Oct 2025 14:37:48 +0200 Subject: [PATCH 08/11] feat: enhance CompositeRetriever with concurrent retrieval and early pruning capabilities --- .../src/rag_core_api/dependency_container.py | 1 + .../impl/retriever/composite_retriever.py | 52 ++++++++++++++++--- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/libs/rag-core-api/src/rag_core_api/dependency_container.py b/libs/rag-core-api/src/rag_core_api/dependency_container.py index 57cff6e3..3e6da5fa 100644 --- a/libs/rag-core-api/src/rag_core_api/dependency_container.py +++ b/libs/rag-core-api/src/rag_core_api/dependency_container.py @@ -166,6 +166,7 @@ class DependencyContainer(DeclarativeContainer): CompositeRetriever, List(image_retriever, table_retriever, text_retriever, summary_retriever), reranker, + retriever_settings.total_k, ) information_piece_mapper = Singleton(InformationPieceMapper) diff --git a/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py b/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py index ed7501e7..dc07d86b 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py +++ b/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py @@ -1,6 +1,17 @@ -"""Module for the CompositeRetriever class.""" +"""Module for the CompositeRetriever class. + +Performance notes / improvements (2025-10): + - Retriever invocations are now executed concurrently via ``asyncio.gather`` instead of + sequential awaits inside a for-loop. This reduces end-to-end latency roughly to the + slowest individual retriever call instead of the sum of all. + - Duplicate filtering now uses an O(1) set membership check instead of rebuilding a list + comprehension for every candidate (previously O(n^2)). + - Early pruning hook (``_total_k``) is prepared for future enhancement; if provided it + allows trimming the merged candidate list before an optional reranker is invoked. +""" import logging +import asyncio from copy import deepcopy from typing import Any, Optional @@ -22,6 +33,7 @@ def __init__( self, retrievers: list[RetrieverQuark], reranker: Optional[Reranker], + total_k: int | None = None, **kwargs, ): """ @@ -39,6 +51,8 @@ def __init__( super().__init__(**kwargs) self._reranker = reranker self._retrievers = retrievers + # Optional global cap (before reranking) on merged candidates. If None, no cap applied. + self._total_k = total_k def verify_readiness(self) -> None: """ @@ -83,24 +97,46 @@ async def ainvoke( - Duplicate entries are removed based on their metadata ID. - If a reranker is available, the results are further processed by the reranker. """ - results = [] if config is None: config = RunnableConfig(metadata={"filter_kwargs": {}}) - for retriever in self._retrievers: - tmp_config = deepcopy(config) - results += await retriever.ainvoke(retriever_input, config=tmp_config) + + # Run all retrievers concurrently instead of sequentially. + tasks = [r.ainvoke(retriever_input, config=deepcopy(config)) for r in self._retrievers] + retriever_outputs = await asyncio.gather(*tasks, return_exceptions=False) + # Flatten + results: list[Document] = [doc for group in retriever_outputs for doc in group] # remove summaries results = [x for x in results if x.metadata["type"] != ContentType.SUMMARY.value] # remove duplicated entries return_val = [] + seen_ids: set[str] = set() for result in results: - if result.metadata["id"] in [x.metadata["id"] for x in return_val]: + if result.metadata.get("type") == ContentType.SUMMARY.value: + continue + doc_id = result.metadata.get("id") + if doc_id is None: + # If an ID is missing, keep it (can't deduplicate deterministically) + return_val.append(result) + continue + if doc_id in seen_ids: continue + seen_ids.add(doc_id) return_val.append(result) - if self._reranker and results: - return_val = await self._reranker.ainvoke((return_val, retriever_input), config=config) + # Optional early global pruning (only if configured and more than total_k) + if self._total_k is not None and len(return_val) > self._total_k: + # If score metadata exists, use it to prune; otherwise keep ordering as-is. + if all("score" in d.metadata for d in return_val): + return_val.sort(key=lambda d: d.metadata["score"], reverse=True) + return_val = return_val[: self._total_k] + + if self._reranker and return_val: + # Only invoke reranker if there are more docs than it will output OR if score missing. + try: + return_val = await self._reranker.ainvoke((return_val, retriever_input), config=config) + except Exception: # pragma: no cover - fail soft; return unreranked if reranker errors + logger.exception("Reranker failed; returning unreranked results.") return return_val From 8bfc8bd43af8731c7037958405b1bb9a0cd0f963 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Thu, 9 Oct 2025 09:24:08 +0200 Subject: [PATCH 09/11] feat: update retriever settings to introduce canonical total_k_documents and enable reranker --- infrastructure/rag/values.yaml | 4 ++- .../impl/settings/retriever_settings.py | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/infrastructure/rag/values.yaml b/infrastructure/rag/values.yaml index f751651c..b433b4dd 100644 --- a/infrastructure/rag/values.yaml +++ b/infrastructure/rag/values.yaml @@ -166,7 +166,8 @@ backend: retriever: RETRIEVER_THRESHOLD: 0.3 RETRIEVER_K_DOCUMENTS: 10 - RETRIEVER_TOTAL_K: 7 + # Canonical global cap across all retrievers. Replaces legacy RETRIEVER_TOTAL_K / RETRIEVER_OVERALL_K_DOCUMENTS + RETRIEVER_TOTAL_K_DOCUMENTS: 20 RETRIEVER_SUMMARY_THRESHOLD: 0.3 RETRIEVER_SUMMARY_K_DOCUMENTS: 10 RETRIEVER_TABLE_THRESHOLD: 0.3 @@ -211,6 +212,7 @@ backend: reranker: RERANKER_K_DOCUMENTS: 5 RERANKER_MIN_RELEVANCE_SCORE: 0.001 + RERANKER_ENABLED: true chatHistory: CHAT_HISTORY_LIMIT: 4 CHAT_HISTORY_REVERSE: true diff --git a/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py b/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py index fa2a3250..a656169d 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py +++ b/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py @@ -1,6 +1,14 @@ -"""Module that contains settings regarding the retriever.""" +"""Module that contains settings regarding the retriever. -from pydantic import Field +Notes +----- +`total_k_documents` is the canonical global cap across all retrievers. It replaces the +previous environment variable names `RETRIEVER_TOTAL_K` and `RETRIEVER_OVERALL_K_DOCUMENTS`. +For backward compatibility, those legacy names are still accepted if the canonical +`RETRIEVER_TOTAL_K_DOCUMENTS` is not set. +""" + +from pydantic import Field, AliasChoices from pydantic_settings import BaseSettings @@ -11,7 +19,7 @@ class RetrieverSettings(BaseSettings): The threshold value for the retriever (default 0.5). k_documents : int The number of documents to retrieve (default 10). - total_k : int + total_k_documents : int The total number of documents (default 10). table_threshold : float The threshold value for table retrieval (default 0.37). @@ -35,10 +43,19 @@ class Config: threshold: float = Field(default=0.5) k_documents: int = Field(default=10) - total_k: int = Field(default=10) table_threshold: float = Field(default=0.37) table_k_documents: int = Field(default=10) summary_threshold: float = Field(default=0.5) summary_k_documents: int = Field(default=10) image_threshold: float = Field(default=0.5) image_k_documents: int = Field(default=10) + # Canonical global cap (previously RETRIEVER_TOTAL_K / RETRIEVER_OVERALL_K_DOCUMENTS). + # Accept legacy env var names as fallbacks via validation alias choices. + total_k_documents: int = Field( + default=10, + validation_alias=AliasChoices( + "TOTAL_K_DOCUMENTS", # canonical -> RETRIEVER_TOTAL_K_DOCUMENTS + "TOTAL_K", # legacy -> RETRIEVER_TOTAL_K + "OVERALL_K_DOCUMENTS", # legacy -> RETRIEVER_OVERALL_K_DOCUMENTS + ), + ) From 0984786a48f8e0a41157fcc5c79750dbe5ed716b Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Thu, 9 Oct 2025 09:24:14 +0200 Subject: [PATCH 10/11] feat: enhance CompositeRetriever with new reranker settings and summary handling --- infrastructure/README.md | 7 +- .../src/rag_core_api/dependency_container.py | 6 +- .../src/rag_core_api/impl/graph/chat_graph.py | 7 + .../impl/retriever/composite_retriever.py | 170 ++++++++++++++---- .../impl/settings/reranker_settings.py | 6 + .../impl/vector_databases/qdrant_database.py | 24 +++ .../tests/composite_retriever_test.py | 128 +++++++++++++ .../tests/mocks/mock_environment_variables.py | 4 +- .../rag-core-api/tests/mocks/mock_reranker.py | 19 ++ .../tests/mocks/mock_retriever_quark.py | 25 +++ .../tests/mocks/mock_vector_db.py | 22 +++ 11 files changed, 382 insertions(+), 36 deletions(-) create mode 100644 libs/rag-core-api/tests/composite_retriever_test.py create mode 100644 libs/rag-core-api/tests/mocks/mock_reranker.py create mode 100644 libs/rag-core-api/tests/mocks/mock_retriever_quark.py create mode 100644 libs/rag-core-api/tests/mocks/mock_vector_db.py diff --git a/infrastructure/README.md b/infrastructure/README.md index 887e7ea5..6dc2d5cc 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -291,7 +291,8 @@ backend: retriever: RETRIEVER_THRESHOLD: 0.3 RETRIEVER_K_DOCUMENTS: 10 - RETRIEVER_TOTAL_K: 7 + # Canonical global cap (previously RETRIEVER_TOTAL_K / RETRIEVER_OVERALL_K_DOCUMENTS) + RETRIEVER_TOTAL_K_DOCUMENTS: 20 RETRIEVER_SUMMARY_THRESHOLD: 0.3 RETRIEVER_SUMMARY_K_DOCUMENTS: 10 RETRIEVER_TABLE_THRESHOLD: 0.3 @@ -489,7 +490,7 @@ Afterwards, the services are accessible from [http://rag.localhost](http://rag.l Note: The command above has only been tested on *Ubuntu 22.04 LTS*. -On *Windows* you can adjust the hosts file as described [here](https://docs.digitalocean.com/products/paperspace/machines/how-to/edit-windows-hosts-file/). +On *Windows* you can adjust the hosts file as described in the DigitalOcean guide on [editing the Windows hosts file](https://docs.digitalocean.com/products/paperspace/machines/how-to/edit-windows-hosts-file/). ### 2.2 Production Setup Instructions @@ -499,7 +500,7 @@ For deployment of the *NGINX Ingress Controller* and a cert-manager, the followi [base-setup](server-setup/base-setup/Chart.yaml) -The email [here](server-setup/base-setup/templates/cert-issuer.yaml) should be changed from `` to a real email address. +The email in the [cert-issuer template](server-setup/base-setup/templates/cert-issuer.yaml) should be changed from `` to a real email address. ## 3. Contributing diff --git a/libs/rag-core-api/src/rag_core_api/dependency_container.py b/libs/rag-core-api/src/rag_core_api/dependency_container.py index 3e6da5fa..17c0df6d 100644 --- a/libs/rag-core-api/src/rag_core_api/dependency_container.py +++ b/libs/rag-core-api/src/rag_core_api/dependency_container.py @@ -126,7 +126,7 @@ class DependencyContainer(DeclarativeContainer): vectorstore=vectorstore, ) - flashrank_reranker = Singleton(FlashrankRerank, top_n=reranker_settings.k_documents) + flashrank_reranker = Singleton(FlashrankRerank, top_n=reranker_settings.k_documents, score_threshold=reranker_settings.min_relevance_score) reranker = Singleton(FlashrankReranker, flashrank_reranker) information_pieces_uploader = Singleton(DefaultInformationPiecesUploader, vector_database) @@ -166,7 +166,9 @@ class DependencyContainer(DeclarativeContainer): CompositeRetriever, List(image_retriever, table_retriever, text_retriever, summary_retriever), reranker, - retriever_settings.total_k, + reranker_settings.enabled, + retriever_settings.total_k_documents, + reranker_settings.k_documents, ) information_piece_mapper = Singleton(InformationPieceMapper) diff --git a/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py b/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py index 73be5e39..2e748bad 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py +++ b/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py @@ -254,6 +254,13 @@ async def _retrieve_node(self, state: dict) -> dict: if document.metadata.get("type", ContentType.SUMMARY.value) != ContentType.SUMMARY.value ] + # If only summaries were retrieved (no concrete underlying documents), treat as "no documents" + if not information_pieces: + return { + self.ERROR_MESSAGES_KEY: [self._error_messages.no_documents_message], + self.FINISH_REASONS: ["No documents found"], + } + response["information_pieces"] = information_pieces response["langchain_documents"] = retrieved_documents diff --git a/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py b/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py index dc07d86b..63e6cef0 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py +++ b/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py @@ -13,7 +13,7 @@ import logging import asyncio from copy import deepcopy -from typing import Any, Optional +from typing import Any, Optional, Iterable from langchain_core.documents import Document from langchain_core.runnables import RunnableConfig @@ -33,7 +33,9 @@ def __init__( self, retrievers: list[RetrieverQuark], reranker: Optional[Reranker], - total_k: int | None = None, + reranker_enabled: bool, + total_retrieved_k_documents: int | None = None, + reranker_k_documents: int | None = None, **kwargs, ): """ @@ -45,6 +47,12 @@ def __init__( A list of retriever quarks to be used by the composite retriever. reranker : Optional[Reranker] An optional reranker to rerank the retrieved results. + reranker_enabled : bool + A flag indicating whether the reranker is enabled. + total_retrieved_k_documents : int | None + The total number of documents to retrieve (default None, meaning no limit). + reranker_k_documents : int | None + The number of documents to retrieve for the reranker (default None, meaning no limit). **kwargs : dict Additional keyword arguments to be passed to the superclass initializer. """ @@ -52,7 +60,9 @@ def __init__( self._reranker = reranker self._retrievers = retrievers # Optional global cap (before reranking) on merged candidates. If None, no cap applied. - self._total_k = total_k + self._total_retrieved_k_documents = total_retrieved_k_documents + self._reranker_k_documents = reranker_k_documents + self._reranker_enabled = reranker_enabled def verify_readiness(self) -> None: """ @@ -81,7 +91,7 @@ async def ainvoke( retriever_input : str The input string to be processed by the retrievers. config : Optional[RunnableConfig] - Configuration for the retrievers (default None). + Configuration for the retrievers and reranker (default None). **kwargs : Any Additional keyword arguments. @@ -106,37 +116,137 @@ async def ainvoke( # Flatten results: list[Document] = [doc for group in retriever_outputs for doc in group] - # remove summaries - results = [x for x in results if x.metadata["type"] != ContentType.SUMMARY.value] - - # remove duplicated entries - return_val = [] - seen_ids: set[str] = set() - for result in results: - if result.metadata.get("type") == ContentType.SUMMARY.value: - continue - doc_id = result.metadata.get("id") - if doc_id is None: - # If an ID is missing, keep it (can't deduplicate deterministically) - return_val.append(result) - continue - if doc_id in seen_ids: - continue - seen_ids.add(doc_id) - return_val.append(result) + summary_docs: list[Document] = [d for d in results if d.metadata.get("type") == ContentType.SUMMARY.value] + results = self._use_summaries(summary_docs, results) + + return_val = self._remove_duplicates(results) + + return_val = self._early_pruning(return_val) + + return_val = await self._arerank_pruning(return_val, retriever_input, config) + + return return_val + + def _use_summaries(self, summary_docs: list[Document], results: list[Document]) -> list[Document]: + """Utilize summary documents to enhance retrieval results. + + Parameters + ---------- + summary_docs : list[Document] + A list of summary documents to use. + results : list[Document] + A list of retrieval results to enhance. + + Returns + ------- + list[Document] + The enhanced list of documents. + """ + try: + # Collect existing ids for fast membership tests + existing_ids: set[str] = {d.metadata.get("id") for d in results} + + # Gather related ids not yet present + + missing_related_ids: set[str] = set() + for sdoc in summary_docs: + related_list: Iterable[str] = sdoc.metadata.get("related", []) + for rid in related_list: + if rid and rid not in existing_ids: + missing_related_ids.add(rid) + + if missing_related_ids: + # Heuristic: use the first retriever's underlying vector database for lookup. + # All quarks share the same vector database instance in current design. + vector_db = None + if self._retrievers: + # Access protected member as an implementation detail – acceptable within package. + vector_db = getattr(self._retrievers[0], "_vector_database", None) + if vector_db and hasattr(vector_db, "get_documents_by_ids"): + try: + expanded_docs: list[Document] = vector_db.get_documents_by_ids(list(missing_related_ids)) + # Merge while preserving original order precedence (append new ones) + results.extend(expanded_docs) + existing_ids.update(d.metadata.get("id") for d in expanded_docs) + logger.debug( + "Summary expansion added %d underlying documents (from %d summaries).", + len(expanded_docs), + len(summary_docs), + ) + except Exception: + logger.exception("Failed to expand summary related documents.") + else: + logger.debug("Vector database does not expose get_documents_by_ids; skipping summary expansion.") + finally: + # Remove summaries after expansion step + results = [x for x in results if x.metadata.get("type") != ContentType.SUMMARY.value] + return results + + def _remove_duplicates(self, documents: list[Document]) -> list[Document]: + """Remove duplicate documents from a list based on their IDs. + + Parameters + ---------- + documents : list[Document] + The list of documents to filter. + + Returns + ------- + list[Document] + The filtered list of documents with duplicates removed. + """ + seen_ids = set() + unique_docs = [] + for doc in documents: + doc_id = doc.metadata.get("id") + if doc_id not in seen_ids: + seen_ids.add(doc_id) + unique_docs.append(doc) + return unique_docs + + def _early_pruning(self, documents: list[Document]) -> list[Document]: + """Prune documents early based on certain criteria. + + Parameters + ---------- + documents : list[Document] + The list of documents to prune. + + Returns + ------- + list[Document] + The pruned list of documents. + """ # Optional early global pruning (only if configured and more than total_k) - if self._total_k is not None and len(return_val) > self._total_k: + if self._total_retrieved_k_documents is not None and len(documents) > self._total_retrieved_k_documents: # If score metadata exists, use it to prune; otherwise keep ordering as-is. - if all("score" in d.metadata for d in return_val): - return_val.sort(key=lambda d: d.metadata["score"], reverse=True) - return_val = return_val[: self._total_k] + if all("score" in d.metadata for d in documents): + documents.sort(key=lambda d: d.metadata["score"], reverse=True) + documents = documents[: self._total_retrieved_k_documents] + return documents + + async def _arerank_pruning(self, documents: list[Document], retriever_input: dict, config: Optional[RunnableConfig] = None) -> list[Document]: + """Prune documents by reranker. - if self._reranker and return_val: + Parameters + ---------- + documents : list[Document] + The list of documents to prune. + retriever_input : dict + The input to the retriever. + config : Optional[RunnableConfig] + Configuration for the retrievers and reranker (default None). + + Returns + ------- + list[Document] + The pruned list of documents. + """ + if self._reranker_k_documents is not None and len(documents) > self._reranker_k_documents and self._reranker_enabled: # Only invoke reranker if there are more docs than it will output OR if score missing. try: - return_val = await self._reranker.ainvoke((return_val, retriever_input), config=config) + documents = await self._reranker.ainvoke((documents, retriever_input), config=config) except Exception: # pragma: no cover - fail soft; return unreranked if reranker errors logger.exception("Reranker failed; returning unreranked results.") - - return return_val + return documents diff --git a/libs/rag-core-api/src/rag_core_api/impl/settings/reranker_settings.py b/libs/rag-core-api/src/rag_core_api/impl/settings/reranker_settings.py index 2af6fd86..3b0df436 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/settings/reranker_settings.py +++ b/libs/rag-core-api/src/rag_core_api/impl/settings/reranker_settings.py @@ -12,6 +12,10 @@ class RerankerSettings(BaseSettings): ---------- k_documents : int The number of documents to return after reranking (default 5). + min_relevance_score : float + Minimum relevance threshold to return (default 0.001). + enabled : bool + A flag indicating whether the reranker is enabled (default True). """ class Config: @@ -21,3 +25,5 @@ class Config: case_sensitive = False k_documents: int = Field(default=5) + min_relevance_score: float = Field(default=0.001) + enabled: bool = Field(default=True) diff --git a/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py b/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py index aa99e89b..b254a041 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py +++ b/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py @@ -155,6 +155,30 @@ def get_specific_document(self, document_id: str) -> list[Document]: for search_result in requested[0] ] + def get_documents_by_ids(self, document_ids: list[str]) -> list[Document]: + """Batch fetch multiple documents by their IDs. + + Parameters + ---------- + document_ids : list[str] + A list of document IDs to retrieve. + + Returns + ------- + list[Document] + A list of found documents. Missing IDs are ignored. + """ + if not document_ids: + return [] + # Scroll with OR semantics: build multiple FieldConditions + # Qdrant Python client doesn't support direct OR via 'should' in Filter shortcuts, + # but we can perform multiple scrolls as a fallback if needed. + # For efficiency, attempt a single scroll per id chunk (keep it simple for now). + results: list[Document] = [] + for doc_id in document_ids: + results.extend(self.get_specific_document(doc_id)) + return results + def upload(self, documents: list[Document]) -> None: """ Save the given documents to the Qdrant database. diff --git a/libs/rag-core-api/tests/composite_retriever_test.py b/libs/rag-core-api/tests/composite_retriever_test.py new file mode 100644 index 00000000..d0957ee2 --- /dev/null +++ b/libs/rag-core-api/tests/composite_retriever_test.py @@ -0,0 +1,128 @@ +"""Unit tests for internal helper methods of ``CompositeRetriever``. + +The goal of these tests is to verify the transformation semantics of: + - _use_summaries + - _remove_duplicates + - _early_pruning + - _arerank_pruning + +They operate with light‑weight mock objects (no real vector DB / reranker logic). +""" + +from __future__ import annotations + +import asyncio +from typing import Iterable + +import pytest +from langchain_core.documents import Document + +from rag_core_api.impl.retriever.composite_retriever import CompositeRetriever +from rag_core_lib.impl.data_types.content_type import ContentType +from mocks.mock_vector_db import MockVectorDB +from mocks.mock_retriever_quark import MockRetrieverQuark +from mocks.mock_reranker import MockReranker + + +def _mk_doc(id: str, score: float | None = None, type: ContentType = ContentType.TEXT, related: Iterable[str] | None = None): + meta = {"id": id, "type": type.value} + if score is not None: + meta["score"] = score + if related is not None: + meta["related"] = list(related) + return Document(page_content=f"content-{id}", metadata=meta) + + +@pytest.mark.asyncio +async def test_use_summaries_expands_and_removes_summary(): + # Summary references an underlying doc not in initial results. + underlying = _mk_doc("doc1", score=0.9) + summary = _mk_doc("sum1", type=ContentType.SUMMARY, related=["doc1"]) # type: ignore[arg-type] + vector_db = MockVectorDB({"doc1": underlying}) + retriever = MockRetrieverQuark([summary, underlying], vector_database=vector_db) + + cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False) + # Directly call _use_summaries for deterministic control + results = cr._use_summaries([summary], [summary]) + + # Underlying doc added (via expansion) & summary removed. + assert len(results) == 1 + assert results[0].metadata["id"] == "doc1" + assert all(d.metadata.get("type") != ContentType.SUMMARY.value for d in results) + + +def test_use_summaries_only_summary_no_related(): + summary = _mk_doc("sum1", type=ContentType.SUMMARY, related=[]) # type: ignore[arg-type] + retriever = MockRetrieverQuark([summary]) + cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False) + results = cr._use_summaries([summary], [summary]) + # Expect empty list after removal because there are no related expansions. + assert results == [] + + + +def test_remove_duplicates_preserves_first_occurrence(): + d1a = _mk_doc("a") + d1b = _mk_doc("a") # duplicate id + d2 = _mk_doc("b") + retriever = MockRetrieverQuark([d1a, d1b, d2]) + cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False) + unique = cr._remove_duplicates([d1a, d1b, d2]) + assert [d.metadata["id"] for d in unique] == ["a", "b"] + + +def test_early_pruning_sorts_by_score_when_all_have_score(): + docs = [_mk_doc("a", score=0.7), _mk_doc("b", score=0.9), _mk_doc("c", score=0.8)] + retriever = MockRetrieverQuark(docs) + cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False, total_retrieved_k_documents=2) + pruned = cr._early_pruning(docs.copy()) + # Expect top two by score descending: b (0.9), c (0.8) + assert [d.metadata["id"] for d in pruned] == ["b", "c"] + + +def test_early_pruning_preserves_order_without_scores(): + docs = [_mk_doc("a"), _mk_doc("b"), _mk_doc("c")] # no scores + retriever = MockRetrieverQuark(docs) + cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False, total_retrieved_k_documents=2) + pruned = cr._early_pruning(docs.copy()) + assert [d.metadata["id"] for d in pruned] == ["a", "b"] + + +@pytest.mark.asyncio +async def test_arerank_pruning_invokes_reranker_when_needed(): + docs = [_mk_doc("a", score=0.5), _mk_doc("b", score=0.7), _mk_doc("c", score=0.9)] + retriever = MockRetrieverQuark(docs) + reranker = MockReranker() + cr = CompositeRetriever( + retrievers=[retriever], + reranker=reranker, + reranker_enabled=True, + reranker_k_documents=2, + ) + pruned = await cr._arerank_pruning(docs.copy(), retriever_input="question") + # Reranker should be invoked and return top-2 by score (ids c, b) + assert reranker.invoked is True + assert [d.metadata["id"] for d in pruned] == ["c", "b"] + assert len(pruned) == 2 + + +@pytest.mark.asyncio +async def test_arerank_pruning_skips_when_not_needed(): + docs = [_mk_doc("a", score=0.5), _mk_doc("b", score=0.7)] # already <= k + retriever = MockRetrieverQuark(docs) + reranker = MockReranker() + cr = CompositeRetriever( + retrievers=[retriever], + reranker=reranker, + reranker_enabled=True, + reranker_k_documents=3, + ) + pruned = await cr._arerank_pruning(docs.copy(), retriever_input="question") + # Not invoked because len(docs) <= reranker_k_documents + assert reranker.invoked is False + assert pruned == docs + + +# Convenience: allow running this test module directly for quick local dev. +if __name__ == "__main__": # pragma: no cover + asyncio.run(pytest.main([__file__])) diff --git a/libs/rag-core-api/tests/mocks/mock_environment_variables.py b/libs/rag-core-api/tests/mocks/mock_environment_variables.py index 72a7bbbf..161bac4d 100644 --- a/libs/rag-core-api/tests/mocks/mock_environment_variables.py +++ b/libs/rag-core-api/tests/mocks/mock_environment_variables.py @@ -34,7 +34,9 @@ def mock_environment_variables() -> None: os.environ["RETRIEVER_THRESHOLD"] = "0.0" os.environ["RETRIEVER_K_DOCUMENTS"] = "10" - os.environ["RETRIEVER_TOTAL_K"] = "10" + # Canonical global cap env. Legacy aliases RETRIEVER_TOTAL_K and RETRIEVER_OVERALL_K_DOCUMENTS + # are intentionally not set here to exercise primary path. + os.environ["RETRIEVER_TOTAL_K_DOCUMENTS"] = "10" os.environ["RETRIEVER_TABLE_THRESHOLD"] = "0.0" os.environ["RETRIEVER_TABLE_K_DOCUMENTS"] = "10" os.environ["RETRIEVER_SUMMARY_THRESHOLD"] = "0.0" diff --git a/libs/rag-core-api/tests/mocks/mock_reranker.py b/libs/rag-core-api/tests/mocks/mock_reranker.py new file mode 100644 index 00000000..51994058 --- /dev/null +++ b/libs/rag-core-api/tests/mocks/mock_reranker.py @@ -0,0 +1,19 @@ +"""Mock reranker used by CompositeRetriever unit tests.""" +from langchain_core.documents import Document + +__all__ = ["MockReranker"] + + +class MockReranker: + def __init__(self): + self.invoked = False + + async def ainvoke(self, payload, config=None): # noqa: D401 + self.invoked = True + documents, _query = payload + # Emulate reranker selecting top 2 with highest 'score' if present; else first 2 reversed + if all("score" in d.metadata for d in documents): + docs_sorted = sorted(documents, key=lambda d: d.metadata["score"], reverse=True) + else: # pragma: no cover - fallback path + docs_sorted = list(reversed(documents)) + return docs_sorted[:2] diff --git a/libs/rag-core-api/tests/mocks/mock_retriever_quark.py b/libs/rag-core-api/tests/mocks/mock_retriever_quark.py new file mode 100644 index 00000000..d0c1c660 --- /dev/null +++ b/libs/rag-core-api/tests/mocks/mock_retriever_quark.py @@ -0,0 +1,25 @@ +"""Mock retriever quark for CompositeRetriever unit tests.""" +from typing import List +from langchain_core.documents import Document + +from .mock_vector_db import MockVectorDB + +__all__ = ["MockRetrieverQuark"] + + +class MockRetrieverQuark: + """Minimal stand-in for a RetrieverQuark. + + Exposes an ``ainvoke`` returning pre-seeded documents and a ``_vector_database`` attribute + referenced by summary expansion logic. + """ + + def __init__(self, documents: List[Document], vector_database: MockVectorDB | None = None): + self._documents = documents + self._vector_database = vector_database or MockVectorDB() + + def verify_readiness(self): # pragma: no cover - trivial + return None + + async def ainvoke(self, *_args, **_kwargs): # noqa: D401 - simple passthrough + return self._documents diff --git a/libs/rag-core-api/tests/mocks/mock_vector_db.py b/libs/rag-core-api/tests/mocks/mock_vector_db.py new file mode 100644 index 00000000..ee1bf468 --- /dev/null +++ b/libs/rag-core-api/tests/mocks/mock_vector_db.py @@ -0,0 +1,22 @@ +"""Mock implementation of a minimal vector database interface for tests. + +Provides only the methods required by the CompositeRetriever unit tests: +- get_documents_by_ids: Used during summary expansion +- asearch: (async) provided as a defensive stub +""" +from typing import Dict, List +from langchain_core.documents import Document + +__all__ = ["MockVectorDB"] + + +class MockVectorDB: + def __init__(self, docs_by_id: Dict[str, Document] | None = None): + self.collection_available = True + self._docs_by_id = docs_by_id or {} + + def get_documents_by_ids(self, ids: List[str]) -> List[Document]: # pragma: no cover - simple mapping + return [self._docs_by_id[i] for i in ids if i in self._docs_by_id] + + async def asearch(self, *_, **__): # pragma: no cover - defensive stub + return [] From b73605727cbd63342e52217b4072447bf1d92d66 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Thu, 9 Oct 2025 09:27:15 +0200 Subject: [PATCH 11/11] refactor: improve code formatting and readability across multiple files --- .../src/rag_core_api/dependency_container.py | 4 +++- .../impl/retriever/composite_retriever.py | 10 ++++++++-- .../impl/settings/retriever_settings.py | 2 +- libs/rag-core-api/tests/composite_retriever_test.py | 13 +++++++++---- libs/rag-core-api/tests/mocks/mock_reranker.py | 1 + .../tests/mocks/mock_retriever_quark.py | 1 + libs/rag-core-api/tests/mocks/mock_vector_db.py | 1 + 7 files changed, 24 insertions(+), 8 deletions(-) diff --git a/libs/rag-core-api/src/rag_core_api/dependency_container.py b/libs/rag-core-api/src/rag_core_api/dependency_container.py index 17c0df6d..da247d5a 100644 --- a/libs/rag-core-api/src/rag_core_api/dependency_container.py +++ b/libs/rag-core-api/src/rag_core_api/dependency_container.py @@ -126,7 +126,9 @@ class DependencyContainer(DeclarativeContainer): vectorstore=vectorstore, ) - flashrank_reranker = Singleton(FlashrankRerank, top_n=reranker_settings.k_documents, score_threshold=reranker_settings.min_relevance_score) + flashrank_reranker = Singleton( + FlashrankRerank, top_n=reranker_settings.k_documents, score_threshold=reranker_settings.min_relevance_score + ) reranker = Singleton(FlashrankReranker, flashrank_reranker) information_pieces_uploader = Singleton(DefaultInformationPiecesUploader, vector_database) diff --git a/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py b/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py index 63e6cef0..01168c3b 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py +++ b/libs/rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py @@ -226,7 +226,9 @@ def _early_pruning(self, documents: list[Document]) -> list[Document]: documents = documents[: self._total_retrieved_k_documents] return documents - async def _arerank_pruning(self, documents: list[Document], retriever_input: dict, config: Optional[RunnableConfig] = None) -> list[Document]: + async def _arerank_pruning( + self, documents: list[Document], retriever_input: dict, config: Optional[RunnableConfig] = None + ) -> list[Document]: """Prune documents by reranker. Parameters @@ -243,7 +245,11 @@ async def _arerank_pruning(self, documents: list[Document], retriever_input: dic list[Document] The pruned list of documents. """ - if self._reranker_k_documents is not None and len(documents) > self._reranker_k_documents and self._reranker_enabled: + if ( + self._reranker_k_documents is not None + and len(documents) > self._reranker_k_documents + and self._reranker_enabled + ): # Only invoke reranker if there are more docs than it will output OR if score missing. try: documents = await self._reranker.ainvoke((documents, retriever_input), config=config) diff --git a/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py b/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py index a656169d..29e5ac41 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py +++ b/libs/rag-core-api/src/rag_core_api/impl/settings/retriever_settings.py @@ -55,7 +55,7 @@ class Config: default=10, validation_alias=AliasChoices( "TOTAL_K_DOCUMENTS", # canonical -> RETRIEVER_TOTAL_K_DOCUMENTS - "TOTAL_K", # legacy -> RETRIEVER_TOTAL_K + "TOTAL_K", # legacy -> RETRIEVER_TOTAL_K "OVERALL_K_DOCUMENTS", # legacy -> RETRIEVER_OVERALL_K_DOCUMENTS ), ) diff --git a/libs/rag-core-api/tests/composite_retriever_test.py b/libs/rag-core-api/tests/composite_retriever_test.py index d0957ee2..557c9340 100644 --- a/libs/rag-core-api/tests/composite_retriever_test.py +++ b/libs/rag-core-api/tests/composite_retriever_test.py @@ -24,7 +24,9 @@ from mocks.mock_reranker import MockReranker -def _mk_doc(id: str, score: float | None = None, type: ContentType = ContentType.TEXT, related: Iterable[str] | None = None): +def _mk_doc( + id: str, score: float | None = None, type: ContentType = ContentType.TEXT, related: Iterable[str] | None = None +): meta = {"id": id, "type": type.value} if score is not None: meta["score"] = score @@ -60,7 +62,6 @@ def test_use_summaries_only_summary_no_related(): assert results == [] - def test_remove_duplicates_preserves_first_occurrence(): d1a = _mk_doc("a") d1b = _mk_doc("a") # duplicate id @@ -74,7 +75,9 @@ def test_remove_duplicates_preserves_first_occurrence(): def test_early_pruning_sorts_by_score_when_all_have_score(): docs = [_mk_doc("a", score=0.7), _mk_doc("b", score=0.9), _mk_doc("c", score=0.8)] retriever = MockRetrieverQuark(docs) - cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False, total_retrieved_k_documents=2) + cr = CompositeRetriever( + retrievers=[retriever], reranker=None, reranker_enabled=False, total_retrieved_k_documents=2 + ) pruned = cr._early_pruning(docs.copy()) # Expect top two by score descending: b (0.9), c (0.8) assert [d.metadata["id"] for d in pruned] == ["b", "c"] @@ -83,7 +86,9 @@ def test_early_pruning_sorts_by_score_when_all_have_score(): def test_early_pruning_preserves_order_without_scores(): docs = [_mk_doc("a"), _mk_doc("b"), _mk_doc("c")] # no scores retriever = MockRetrieverQuark(docs) - cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False, total_retrieved_k_documents=2) + cr = CompositeRetriever( + retrievers=[retriever], reranker=None, reranker_enabled=False, total_retrieved_k_documents=2 + ) pruned = cr._early_pruning(docs.copy()) assert [d.metadata["id"] for d in pruned] == ["a", "b"] diff --git a/libs/rag-core-api/tests/mocks/mock_reranker.py b/libs/rag-core-api/tests/mocks/mock_reranker.py index 51994058..f67a9bc3 100644 --- a/libs/rag-core-api/tests/mocks/mock_reranker.py +++ b/libs/rag-core-api/tests/mocks/mock_reranker.py @@ -1,4 +1,5 @@ """Mock reranker used by CompositeRetriever unit tests.""" + from langchain_core.documents import Document __all__ = ["MockReranker"] diff --git a/libs/rag-core-api/tests/mocks/mock_retriever_quark.py b/libs/rag-core-api/tests/mocks/mock_retriever_quark.py index d0c1c660..95aa7597 100644 --- a/libs/rag-core-api/tests/mocks/mock_retriever_quark.py +++ b/libs/rag-core-api/tests/mocks/mock_retriever_quark.py @@ -1,4 +1,5 @@ """Mock retriever quark for CompositeRetriever unit tests.""" + from typing import List from langchain_core.documents import Document diff --git a/libs/rag-core-api/tests/mocks/mock_vector_db.py b/libs/rag-core-api/tests/mocks/mock_vector_db.py index ee1bf468..c378963e 100644 --- a/libs/rag-core-api/tests/mocks/mock_vector_db.py +++ b/libs/rag-core-api/tests/mocks/mock_vector_db.py @@ -4,6 +4,7 @@ - get_documents_by_ids: Used during summary expansion - asearch: (async) provided as a defensive stub """ + from typing import Dict, List from langchain_core.documents import Document