From 2d2ffc8f9ca33afa04a0aa36d090906d876e08f7 Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:54:45 +0900 Subject: [PATCH] chore: improve agent development setup --- .codex/environments/environment.toml | 5 ++ .github/PULL_REQUEST_TEMPLATE.md | 30 ++++++++ .github/workflows/validate-pr-title.yml | 45 ++++++++++++ .gitignore | 5 ++ AGENTS.md | 24 ++++++- CONTRIBUTING.md | 96 +++++++++++++++++++------ code_review.md | 38 ++++++++++ scripts/codex/maintenance.sh | 8 +++ scripts/codex/quick-check.sh | 9 +++ scripts/codex/setup.sh | 13 ++++ 10 files changed, 249 insertions(+), 24 deletions(-) create mode 100644 .codex/environments/environment.toml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/workflows/validate-pr-title.yml create mode 100644 code_review.md create mode 100755 scripts/codex/maintenance.sh create mode 100755 scripts/codex/quick-check.sh create mode 100755 scripts/codex/setup.sh diff --git a/.codex/environments/environment.toml b/.codex/environments/environment.toml new file mode 100644 index 000000000..a5a97658e --- /dev/null +++ b/.codex/environments/environment.toml @@ -0,0 +1,5 @@ +version = 1 +name = "langfuse-python" + +[setup] +script = "bash scripts/codex/setup.sh" diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000..a06986558 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,30 @@ +## What does this PR do? + +> PR title must follow Conventional Commits, for example `feat: add dataset scoring helper` or `fix(openai): preserve trace context`. + +Fixes # + +## Type of change + +- [ ] Bug fix +- [ ] New feature +- [ ] Breaking change +- [ ] Refactor +- [ ] Documentation update +- [ ] Tooling, CI, or repo maintenance + +## Verification + +List the main commands you ran: + +```bash + +``` + +## Checklist + +- [ ] I self-reviewed the diff using `code_review.md`. +- [ ] I added or updated tests for behavior changes. +- [ ] I updated docs, examples, or `.env.template` if needed. +- [ ] I did not hand-edit generated files; if generated files changed, I used the upstream regeneration path. +- [ ] I did not commit secrets or credentials. diff --git a/.github/workflows/validate-pr-title.yml b/.github/workflows/validate-pr-title.yml new file mode 100644 index 000000000..1d94b4f6b --- /dev/null +++ b/.github/workflows/validate-pr-title.yml @@ -0,0 +1,45 @@ +--- +name: "Validate PR Title" + +on: + pull_request: + branches: + - "**" + types: + - opened + - edited + - synchronize + - reopened + +permissions: {} + +jobs: + validate-pr-title: + runs-on: ubuntu-latest + permissions: + statuses: write + pull-requests: read + steps: + - name: Validate PR title follows conventional commits + uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6.1.1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + types: | + feat + fix + docs + style + refactor + perf + test + build + ci + chore + revert + security + requireScope: false + validateSingleCommit: false + ignoreLabels: | + bot + ignore-semantic-pull-request diff --git a/.gitignore b/.gitignore index bebafdd05..bfd138387 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,8 @@ docs tests/mocks/llama-index-storage *.local.* + +# Codex local runtime state +.codex/log/ +.codex/sessions/ +.codex/tmp/ diff --git a/AGENTS.md b/AGENTS.md index eff827d76..a1a6f5846 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,6 +25,7 @@ This repository contains the Langfuse Python SDK. - `tests/live_provider/`: live OpenAI / LangChain provider tests - `tests/support/`: shared helpers for e2e tests - `scripts/select_e2e_shard.py`: CI shard selector for `tests/e2e` +- `scripts/codex/`: Codex cloud/worktree bootstrap and shared quick checks ## Working Style @@ -34,6 +35,8 @@ This repository contains the Langfuse Python SDK. - Keep repo-shared instructions here. Keep personal or machine-specific notes out of version control. - Keep tests independent and parallel-safe by default. - For bug fixes, prefer writing or identifying the failing test first, confirm the failure, then implement the fix. +- For complex or ambiguous tasks, plan first, identify the likely verification path, then implement. +- Before final handoff, review the diff for correctness, regressions, missing tests, and accidental generated-file edits. ## Setup And Quality Commands @@ -43,6 +46,7 @@ uv run pre-commit install uv run --frozen ruff check . uv run --frozen ruff format . uv run --frozen mypy langfuse --no-error-summary +bash scripts/codex/quick-check.sh ``` ## Test Commands @@ -66,6 +70,18 @@ uv run --frozen pytest -n 4 --dist worksteal tests/live_provider -m "live_provid uv run --frozen pytest tests/unit/test_resource_manager.py::test_pause_signals_score_consumer_shutdown ``` +Minimum verification matrix: + +| Change scope | Minimum verification | +| --- | --- | +| Docs or comments only | `uv run --frozen ruff format --check .` if Python files changed | +| Python source only | `uv run --frozen ruff check .` + `uv run --frozen mypy langfuse --no-error-summary` + targeted unit tests | +| Unit-test-only change | targeted `uv run --frozen pytest ...` for the changed tests | +| Shutdown, flushing, worker-thread, or OTEL-heavy change | targeted resource-manager/OTEL tests plus affected integration tests when relevant | +| OpenAI or LangChain instrumentation | targeted unit tests using exporter-local assertions; add e2e/live-provider coverage only when unit tests cannot cover behavior | +| Generated API client or public API contract | upstream Fern/OpenAPI regeneration path plus targeted SDK serialization/deserialization tests | +| CI, sharding, or bootstrap | relevant script test plus CI workflow review against this file's CI contract | + ## Test Topology ### `tests/unit` @@ -96,6 +112,7 @@ The main CI workflow currently runs: - `tests/unit` on a Python 3.10-3.14 matrix - `tests/e2e` in 2 mechanical shards plus a serial subset inside each shard - `tests/live_provider` as one always-on suite +- PR title validation for Conventional Commits If you change the e2e split: @@ -113,6 +130,7 @@ If you change CI bootstrap: - Keep changes scoped. Avoid unrelated refactors. - Prefer `LANGFUSE_BASE_URL`; `LANGFUSE_HOST` is deprecated and is only kept for compatibility tests. - If you touch `langfuse/api/`, regenerate it from the upstream Fern/OpenAPI source instead of hand-editing files. +- If you change public SDK behavior, update examples, README snippets, or generated reference docs when they would otherwise become stale. - If you touch shutdown, flushing, or worker-thread behavior, run the relevant resource-manager and OTEL-heavy tests. - If you change OpenAI or LangChain instrumentation, keep as much coverage as possible in `tests/unit` using exporter-local assertions, and leave only the minimal necessary coverage in `tests/e2e` / `tests/live_provider`. - Never commit secrets or credentials. @@ -120,9 +138,11 @@ If you change CI bootstrap: ## Commit And PR Rules -- Commit messages and PR titles should follow Conventional Commits: `type(scope): description` or `type: description`. +- Commit messages and PR titles must follow Conventional Commits: `type(scope): description` or `type: description`. +- Allowed common types include `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, `revert`, and `security`. - Keep commits focused and atomic. -- In PR descriptions, list the main verification commands you ran. +- Before opening a PR, self-review the diff and check `code_review.md` for the repo-specific review checklist. +- In PR descriptions, list the main verification commands you ran and call out any skipped checks with the reason. ## Python-Specific Notes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 946400a5d..45f1ed55d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,48 +4,100 @@ ### Install dependencies -``` -uv sync +```bash +uv sync --locked ``` -### Add Pre-commit +### Add pre-commit -``` +```bash uv run pre-commit install ``` -### Type Checking +### Quality checks -To run type checking on the langfuse package, run: -```sh -uv run mypy langfuse --no-error-summary +```bash +uv run --frozen ruff check . +uv run --frozen ruff format . +uv run --frozen mypy langfuse --no-error-summary +``` + +For a broad local confidence check, run: + +```bash +bash scripts/codex/quick-check.sh ``` ### Tests -#### Setup +Unit tests do not require a running Langfuse server: -- Add .env based on .env.template +```bash +uv run --frozen pytest -n auto --dist worksteal tests/unit +``` -#### Run +E2E tests require a running Langfuse server and environment variables based on `.env.template`: -- Run all +```bash +uv run --frozen pytest -n 4 --dist worksteal tests/e2e -m "not serial_e2e" +uv run --frozen pytest tests/e2e -m "serial_e2e" +``` + +Live-provider tests make real provider calls and require provider API keys: + +```bash +uv run --frozen pytest -n 4 --dist worksteal tests/live_provider -m "live_provider" +``` + +Run a specific test with: - ``` - uv run --env-file .env pytest -s -v --log-cli-level=INFO - ``` +```bash +uv run --frozen pytest tests/unit/test_resource_manager.py::test_pause_signals_score_consumer_shutdown +``` + +## Codex Cloud Setup + +This repository includes repo-owned Codex setup so agents can start from a reproducible environment. + +Recommended Codex UI configuration: + +1. Create a Codex cloud environment for this repository. +2. Set the setup script to: + + ```bash + bash scripts/codex/setup.sh + ``` + +3. Set the maintenance script to: + + ```bash + bash scripts/codex/maintenance.sh + ``` + +4. Keep agent internet access disabled by default, or allow only the domains required for the task. +5. Add secrets and environment variables in the Codex UI instead of committing them. + +## Pull Requests + +PR titles and commit messages must follow Conventional Commits: + +```text +type(scope): description +type: description +``` -- Run a specific test +Common types include `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, `revert`, and `security`. - ``` - uv run --env-file .env pytest -s -v --log-cli-level=INFO tests/test_core_sdk.py::test_flush - ``` +Before opening a PR: -- E2E tests involving OpenAI and Serp API are usually skipped, remove skip decorators in [tests/test_langchain.py](tests/test_langchain.py) to run them. +- Self-review the diff and use `code_review.md` for the repo-specific checklist. +- Keep changes focused and avoid unrelated refactors. +- Add or update tests for behavior changes. +- List the verification commands you ran in the PR description. -### Update openapi spec +### Update OpenAPI spec -A PR with the changes is automatically created upon changing the Spec in the langfuse repo. +The generated API client in `langfuse/api/` must not be hand-edited. Regenerate it from the upstream Fern/OpenAPI source. ### Publish release diff --git a/code_review.md b/code_review.md new file mode 100644 index 000000000..a3c55051c --- /dev/null +++ b/code_review.md @@ -0,0 +1,38 @@ +# Langfuse Python SDK Review Checklist + +Use this checklist for `/review`, PR review, or self-review before handoff. + +## Priorities + +- Findings first: correctness bugs, regressions, security/privacy risks, performance issues with real impact, and missing tests for risky behavior. +- Keep line references tight and actionable. +- If no findings, say so explicitly and mention any residual risk or unrun verification. + +## SDK Correctness + +- Public SDK behavior should remain backwards compatible unless the PR is explicitly breaking. +- Prefer `LANGFUSE_BASE_URL`; `LANGFUSE_HOST` is deprecated and should only appear in compatibility paths or tests. +- Check shutdown, flushing, background task, and resource-manager changes for races, dropped events/scores/media, daemon-thread leaks, and hanging interpreter shutdown. +- OpenTelemetry changes should preserve context propagation, span parenting, exporter-local testability, and idempotent instrumentation setup. +- OpenAI and LangChain instrumentation should avoid brittle assertions on provider internals; prefer stable exporter-local behavior in unit tests. + +## API And Generated Code + +- Do not hand-edit `langfuse/api/`; regenerate it from the upstream Fern/OpenAPI source. +- Public API or serialization changes should include tests for request shape, response shape, and backwards-compatible aliases when relevant. +- Update README examples, `.env.template`, or generated reference docs when changed behavior would make them stale. + +## Tests And CI + +- Unit tests must not require a running Langfuse server. +- E2E tests should use bounded polling helpers from `tests/support/`, not raw `sleep()`. +- New e2e files must be named `tests/e2e/test_*.py` so mechanical CI sharding includes them. +- Use `serial_e2e` only for tests that are unsafe with shared-server concurrency. +- Live-provider tests should assert stable provider-facing behavior, not exact observation counts unless counts are the behavior under test. + +## Python Style + +- Exception messages should not inline f-string literals in `raise` statements; build the message in a variable first. +- Keep edits ASCII-only unless the file already uses Unicode or Unicode is clearly required. +- Keep changes scoped; avoid opportunistic refactors. +- Never commit secrets or credentials. diff --git a/scripts/codex/maintenance.sh b/scripts/codex/maintenance.sh new file mode 100755 index 000000000..aba80e6a3 --- /dev/null +++ b/scripts/codex/maintenance.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$repo_root" + +uv sync --locked +uv cache prune --ci >/dev/null 2>&1 || true diff --git a/scripts/codex/quick-check.sh b/scripts/codex/quick-check.sh new file mode 100755 index 000000000..fd3c1823c --- /dev/null +++ b/scripts/codex/quick-check.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$repo_root" + +uv run --frozen ruff check . +uv run --frozen mypy langfuse --no-error-summary +uv run --frozen pytest -n auto --dist worksteal tests/unit diff --git a/scripts/codex/setup.sh b/scripts/codex/setup.sh new file mode 100755 index 000000000..b6e7d30f5 --- /dev/null +++ b/scripts/codex/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$repo_root" + +if ! command -v uv >/dev/null 2>&1; then + python3 -m pip install --user "uv==0.11.2" + export PATH="$HOME/.local/bin:$PATH" +fi + +uv sync --locked +uv run --frozen python --version