Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ jobs:
run: npm cache clean --force || true
- run: npx nx affected -t install --with dev
- run: npx nx affected -t lint --parallel=3
- run: npx nx affected -t type-check --parallel=3

build-packages:
name: Build Packages
Expand Down
244 changes: 210 additions & 34 deletions packages/traceloop-sdk/poetry.lock

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions packages/traceloop-sdk/project.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@
"outputFile": "reports/packages/traceloop-sdk/pylint.txt"
}
},
"type-check": {
"executor": "@nxlv/python:run-commands",
"outputs": [],
"options": {
"command": "poetry run mypy traceloop/sdk",
"cwd": "packages/traceloop-sdk"
}
},
"test": {
"executor": "@nxlv/python:run-commands",
"outputs": [
Expand Down
42 changes: 42 additions & 0 deletions packages/traceloop-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ autopep8 = "^2.2.0"
flake8 = "7.0.0"
pytest = "^8.2.2"
pytest-sugar = "1.0.0"
mypy = "^1.18.2"
types-requests = "^2.31.0"
types-colorama = "^0.4.15"
pandas-stubs = "*"

[tool.poetry.group.test.dependencies]
openai = "^1.31.1"
Expand All @@ -94,6 +98,44 @@ pandas = ">=1.0.0"
[tool.poetry.extras]
datasets = ["pandas"]

[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_any_unimported = false
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
check_untyped_defs = true
strict_equality = true
namespace_packages = true
explicit_package_bases = true
plugins = ["pydantic.mypy"]

# Blacklist approach - all folders checked except those excluded below
exclude = [
"traceloop/sdk/decorators",
"traceloop/sdk/prompts",
"traceloop/sdk/tracing",
"traceloop/sdk/utils",
"traceloop/sdk/__init__.py",
"tests/",
]

[[tool.mypy.overrides]]
module = [
"cuid.*",
"posthog.*",
]
ignore_missing_imports = true

[pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
6 changes: 3 additions & 3 deletions packages/traceloop-sdk/tests/test_user_feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_user_feedback_initialization(mock_http):
assert feedback._app_name == "test-app"


def test_create_basic_feedback(user_feedback, mock_http):
def test_create_basic_feedback(user_feedback: UserFeedback, mock_http: Mock):
"""Test creating basic user feedback"""
user_feedback.create(
annotation_task="task_123", entity_id="instance_456", tags={"sentiment": "positive"}
Expand All @@ -56,7 +56,7 @@ def test_create_basic_feedback(user_feedback, mock_http):
)


def test_create_feedback_complex_tags(user_feedback, mock_http):
def test_create_feedback_complex_tags(user_feedback: UserFeedback, mock_http: Mock):
"""Test creating user feedback with complex tags"""
tags = {"sentiment": "positive", "relevance": 0.95, "tones": ["happy", "nice"]}

Expand All @@ -77,7 +77,7 @@ def test_create_feedback_complex_tags(user_feedback, mock_http):
)


def test_create_feedback_parameter_validation(user_feedback):
def test_create_feedback_parameter_validation(user_feedback: UserFeedback):
"""Test parameter validation for feedback creation"""
with pytest.raises(ValueError, match="annotation_task is required"):
user_feedback.create(annotation_task="", entity_id="instance_456", tags={"sentiment": "positive"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def create(
Args:
annotation_task (str): The ID/slug of the annotation task to report to.
Can be found at app.traceloop.com/annotation_tasks/:annotation_task_id
entity_id (str): The ID of the specific entity instance being annotated, should be reported
entity_id (str): The ID of the specific entity being annotated, should be reported
in the association properties
tags (Dict[str, Any]): Dictionary containing the tags to be reported.
Should match the tags defined in the annotation task
Expand All @@ -39,7 +39,7 @@ def create(
client = Client(api_key="your-key")
client.annotation.create(
annotation_task="task_123",
entity_id="instance_456",
entity_id="456",
tags={
"sentiment": "positive",
"relevance": 0.95,
Expand Down
65 changes: 32 additions & 33 deletions packages/traceloop-sdk/traceloop/sdk/annotation/user_feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,35 @@ class UserFeedback(BaseAnnotation):
def __init__(self, http: HTTPClient, app_name: str):
super().__init__(http, app_name, "user_feedback")


def create(
self,
annotation_task: str,
entity_instance_id: str,
tags: Dict[str, Any],
) -> None:
"""Create an annotation for a specific task.

Args:
annotation_task (str): The ID/slug of the annotation task to report to.
Can be found at app.traceloop.com/annotation_tasks/:annotation_task_id
entity_instance_id (str): The ID of the specific entity instance being annotated, should be reported
in the association properties
tags (Dict[str, Any]): Dictionary containing the tags to be reported.
Should match the tags defined in the annotation task

Example:
```python
client = Client(api_key="your-key")
client.annotation.create(
annotation_task="task_123",
entity_instance_id="instance_456",
tags={
"sentiment": "positive",
"relevance": 0.95,
"tones": ["happy", "nice"]
},
)
```
"""

return BaseAnnotation.create(self, annotation_task, entity_instance_id, tags)
def create(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like an un needed change

self,
annotation_task: str,
entity_id: str,
tags: Dict[str, Any],
) -> None:
"""Create an annotation for a specific task.

Args:
annotation_task (str): The ID/slug of the annotation task to report to.
Can be found at app.traceloop.com/annotation_tasks/:annotation_task_id
entity_id (str): The ID of the specific entity being annotated, should be reported
in the association properties
tags (Dict[str, Any]): Dictionary containing the tags to be reported.
Should match the tags defined in the annotation task

Example:
```python
client = Client(api_key="your-key")
client.annotation.create(
annotation_task="task_123",
entity_id="instance_456",
tags={
"sentiment": "positive",
"relevance": 0.95,
"tones": ["happy", "nice"]
},
)
```
"""

return BaseAnnotation.create(self, annotation_task, entity_id, tags)
3 changes: 2 additions & 1 deletion packages/traceloop-sdk/traceloop/sdk/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@ def __init__(
self.user_feedback = UserFeedback(self._http, self.app_name)
self.datasets = Datasets(self._http)
experiment_slug = os.getenv("TRACELOOP_EXP_SLUG")
self.experiment = Experiment(self._http, self._async_http, experiment_slug)
# TODO: Fix type - Experiment constructor should accept Optional[str]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

self.experiment = Experiment(self._http, self._async_http, experiment_slug) # type: ignore[arg-type]
2 changes: 1 addition & 1 deletion packages/traceloop-sdk/traceloop/sdk/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, base_url: str, api_key: str, version: str):
self.api_key = api_key
self.version = version

def _headers(self):
def _headers(self) -> Dict[str, str]:
return {
"Authorization": f"Bearer {self.api_key}",
"X-Traceloop-SDK-Version": self.version,
Expand Down
4 changes: 2 additions & 2 deletions packages/traceloop-sdk/traceloop/sdk/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def add_column(self, slug: str, name: str, col_type: ColumnType) -> Column:
self.columns.append(column)
return column

def _create_columns(self, raw_columns: Dict[str, ColumnDefinition]):
def _create_columns(self, raw_columns: Dict[str, ColumnDefinition]) -> None:
"""Create Column objects from API response which includes column IDs"""
for column_slug, column_def in raw_columns.items():
column = Column(
Expand All @@ -98,7 +98,7 @@ def _create_columns(self, raw_columns: Dict[str, ColumnDefinition]):
)
self.columns.append(column)

def _create_rows(self, raw_rows: List[RowObject]):
def _create_rows(self, raw_rows: List[RowObject]) -> None:
for _, row_obj in enumerate(raw_rows):
row = Row(
http=self._http,
Expand Down
13 changes: 9 additions & 4 deletions packages/traceloop-sdk/traceloop/sdk/datasets/datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import csv
from typing import List, Optional
from typing import List, Optional, cast
from pathlib import Path

try:
Expand Down Expand Up @@ -81,6 +81,10 @@ def from_csv(

reader = csv.DictReader(csvfile, delimiter=delimiter)

# TODO: Handle None case for fieldnames more gracefully
if reader.fieldnames is None:
raise ValueError("CSV file has no headers")

for field_name in reader.fieldnames:
columns_definition.append(
ColumnDefinition(
Expand Down Expand Up @@ -138,8 +142,9 @@ def from_dataframe(
)
)

# TODO: Pandas returns Hashable keys, should ensure they're strings
rows = [
{self._slugify(k): v for k, v in row.items()}
{self._slugify(str(k)): v for k, v in row.items()}
for row in df.to_dict(orient="records")
]

Expand All @@ -160,14 +165,14 @@ def get_version_csv(self, slug: str, version: str) -> str:
result = self._http.get(f"datasets/{slug}/versions/{version}")
if result is None:
raise Exception(f"Failed to get dataset {slug} by version {version}")
return result
return cast(str, result)

def get_version_jsonl(self, slug: str, version: str) -> str:
"""Get a specific version of a dataset as a JSONL string"""
result = self._http.get(f"datasets/{slug}/versions/{version}/jsonl")
if result is None:
raise Exception(f"Failed to get dataset {slug} by version {version}")
return result
return cast(str, result)

def _create_dataset(self, input: CreateDatasetRequest) -> CreateDatasetResponse:
"""Create new dataset"""
Expand Down
6 changes: 2 additions & 4 deletions packages/traceloop-sdk/traceloop/sdk/decorators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, TypeVar, Callable, Any, ParamSpec, Awaitable
from typing import Any, Optional, TypeVar, Callable
import warnings

from opentelemetry.semconv_ai import TraceloopSpanKindValues
Expand All @@ -8,9 +8,7 @@
entity_method,
)

P = ParamSpec("P")
R = TypeVar("R")
F = TypeVar("F", bound=Callable[P, R | Awaitable[R]])
Comment on lines -11 to -13
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got this with the agent after allowing mypy

The issue: The TypeVar F has a bound that uses ParamSpec and is too strict:

  1. Coroutine[Any, Any, WorkflowFinalOutput] is more specific than Awaitable[R], but TypeVars require exact matching (invariance)
  2. The ParamSpec creates strict parameter matching that doesn't work well with instance methods (which include self)
  3. This appears to be introduced in a recent traceloop-sdk commit (77bb066) that tried to "improve type safety"

Why It Looks Good in traceloop-sdk

The decorator types look correct syntactically, but they're overly restrictive for real-world usage patterns. The traceloop-sdk itself doesn't strictly type-check the decorators folder, so this issue wasn't caught in their own type checking.

F = TypeVar("F", bound=Callable[..., Any])


def task(
Expand Down
9 changes: 2 additions & 7 deletions packages/traceloop-sdk/traceloop/sdk/decorators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
from functools import wraps
import os
from typing import (
Optional,
TypeVar,
Optional,
Callable,
Any,
cast,
ParamSpec,
Awaitable,
)
import inspect
import warnings
Expand All @@ -28,10 +26,7 @@
from traceloop.sdk.utils import camel_to_snake
from traceloop.sdk.utils.json_encoder import JSONEncoder

P = ParamSpec("P")

R = TypeVar("R")
F = TypeVar("F", bound=Callable[P, R | Awaitable[R]])
F = TypeVar("F", bound=Callable[..., Any])


def _truncate_json_if_needed(json_str: str) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ async def wait_for_result(
except Exception as e:
raise Exception(f"Unexpected error in SSE stream: {e}")

async def _handle_sse_response(self, response) -> ExecutionResponse:
async def _handle_sse_response(self, response: httpx.Response) -> ExecutionResponse:
"""Handle SSE response: check status and parse result"""
if response.status_code != 200:
error_text = await response.aread()
raise Exception(
f"Failed to stream results: {response.status_code}, body: {error_text}"
)
# TODO: Fix bytes formatting - should decode error_text or use !r
error_msg = f"Failed to stream results: {response.status_code}, body: {error_text}" # type: ignore[str-bytes-safe] # noqa: E501
raise Exception(error_msg)
Comment on lines +59 to +61
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix bytes formatting in error message.

The error message includes error_text (bytes) directly in the f-string, which will display as b'...' instead of the actual error text. This degrades the error message quality for users.

Apply this diff to decode the bytes and remove the type ignore:

-        error_text = await response.aread()
-        # TODO: Fix bytes formatting - should decode error_text or use !r
-        error_msg = f"Failed to stream results: {response.status_code}, body: {error_text}"  # type: ignore[str-bytes-safe]  # noqa: E501
-        raise Exception(error_msg)
+        error_text = await response.aread()
+        error_msg = f"Failed to stream results: {response.status_code}, body: {error_text.decode('utf-8', errors='replace')}"
+        raise Exception(error_msg)

Additional note: As per static analysis, the # noqa: E501 directive on line 60 is unused and can be removed (already removed in the suggested fix above).

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 Ruff (0.14.5)

60-60: Unused noqa directive (non-enabled: E501)

Remove unused noqa directive

(RUF100)


61-61: Create your own exception

(TRY002)

🤖 Prompt for AI Agents
In packages/traceloop-sdk/traceloop/sdk/evaluator/stream_client.py around lines
59 to 61, the error_msg f-string includes error_text which may be bytes and
renders as b'...'; decode error_text before interpolating (e.g. error_str =
error_text.decode('utf-8', errors='replace') if isinstance(error_text, (bytes,
bytearray)) else str(error_text)) and use that decoded string in the f-string,
and remove the trailing "# type: ignore[str-bytes-safe]" and the unused "# noqa:
E501" comments so static analysis passes.


response_text = await response.aread()
return self._parse_sse_result(response_text.decode())
Expand Down
19 changes: 11 additions & 8 deletions packages/traceloop-sdk/traceloop/sdk/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,15 @@ async def run(
results: List[TaskResponse] = []
errors: List[str] = []

async def run_single_row(row) -> TaskResponse:
async def run_single_row(row: Optional[Dict[str, Any]]) -> TaskResponse:
try:
task_result = await task(row)
# TODO: Fix type annotation - task should return Awaitable, not dict
task_result = await task(row) # type: ignore[misc]
# TODO: Fix type - task_input should accept Optional[Dict]
task_id = self._create_task(
experiment_slug=experiment_slug,
experiment_run_id=run_id,
task_input=row,
task_input=row, # type: ignore[arg-type]
task_output=task_result,
).id
Comment on lines +98 to 108
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Address the Optional[Dict] vs Dict type mismatch.

The row parameter is typed as Optional[Dict[str, Any]] (line 98), but _create_task expects a non-optional Dict[str, Any] for task_input (line 214). The type: ignore[arg-type] on line 106 suppresses the error without handling the potential None case.

If row can actually be None at runtime, add a guard:

                 task_result = await task(row)  # type: ignore[misc]
+                if row is None:
+                    raise ValueError("Cannot create task with None input")
                 task_id = self._create_task(
                     experiment_slug=experiment_slug,
                     experiment_run_id=run_id,
-                    task_input=row,  # type: ignore[arg-type]
+                    task_input=row,
                     task_output=task_result,
                 ).id

Otherwise, if row is guaranteed to be non-None (since it comes from the rows list parsed from JSONL), change the type annotation:

-        async def run_single_row(row: Optional[Dict[str, Any]]) -> TaskResponse:
+        async def run_single_row(row: Dict[str, Any]) -> TaskResponse:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async def run_single_row(row: Optional[Dict[str, Any]]) -> TaskResponse:
try:
task_result = await task(row)
# TODO: Fix type annotation - task should return Awaitable, not dict
task_result = await task(row) # type: ignore[misc]
# TODO: Fix type - task_input should accept Optional[Dict]
task_id = self._create_task(
experiment_slug=experiment_slug,
experiment_run_id=run_id,
task_input=row,
task_input=row, # type: ignore[arg-type]
task_output=task_result,
).id
async def run_single_row(row: Dict[str, Any]) -> TaskResponse:
try:
# TODO: Fix type annotation - task should return Awaitable, not dict
task_result = await task(row) # type: ignore[misc]
# TODO: Fix type - task_input should accept Optional[Dict]
task_id = self._create_task(
experiment_slug=experiment_slug,
experiment_run_id=run_id,
task_input=row, # type: ignore[arg-type]
task_output=task_result,
).id
🤖 Prompt for AI Agents
In packages/traceloop-sdk/traceloop/sdk/experiment/experiment.py around lines
98–108, the function run_single_row types row as Optional[Dict[str, Any]] but
passes it into self._create_task which requires a non-optional Dict, and the
current type: ignore[arg-type] masks this; fix by either (A) if row can be None
at runtime: add an explicit guard before calling _create_task (e.g., if row is
None: raise a clear ValueError/TypeError with context) and then pass row (typed
as Dict) without type ignores, or (B) if row is never None (rows come from
validated JSONL), change the run_single_row parameter annotation to Dict[str,
Any] and adjust any callers accordingly, then remove the type: ignore[arg-type];
ensure the chosen approach removes the type ignore and the mypy error is
resolved.


Expand Down Expand Up @@ -132,12 +134,13 @@ async def run_single_row(row) -> TaskResponse:
input=task_result,
)

eval_results[evaluator_slug] = (
f"Triggered execution of {evaluator_slug}"
)
# TODO: Fix type - eval_results should accept Union[Dict, str]
msg = f"Triggered execution of {evaluator_slug}"
eval_results[evaluator_slug] = msg # type: ignore[assignment]

except Exception as e:
eval_results[evaluator_slug] = f"Error: {str(e)}"
# TODO: Fix type - eval_results should accept Union[Dict, str]
eval_results[evaluator_slug] = f"Error: {str(e)}" # type: ignore[assignment]

return TaskResponse(
task_result=task_result,
Expand All @@ -151,7 +154,7 @@ async def run_single_row(row) -> TaskResponse:

semaphore = asyncio.Semaphore(50)

async def run_with_semaphore(row) -> TaskResponse:
async def run_with_semaphore(row: Optional[Dict[str, Any]]) -> TaskResponse:
async with semaphore:
return await run_single_row(row)

Expand Down
Loading