diff --git a/.github/workflows/check-import-mappings.yml b/.github/workflows/check-import-mappings.yml new file mode 100644 index 0000000000..bd0fd8963e --- /dev/null +++ b/.github/workflows/check-import-mappings.yml @@ -0,0 +1,115 @@ +--- +name: Check Import Mappings + +on: + schedule: + # Run every 3 days at midnight UTC + - cron: "0 0 */3 * *" + workflow_dispatch: # Allow manual trigger + +jobs: + check-mappings: + name: Check langchain_core re-exports in langchain + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + steps: + - uses: actions/checkout@v5 + + - name: Set up Python 3.13 + uv + uses: "./.github/actions/uv_setup" + with: + python-version: "3.13" + + - name: Install dependencies + run: | + uv sync --group test + + - name: Check import mappings + run: | + uv run scripts/check_import_mappings.py + + - name: Upload import mappings as artifact + uses: actions/upload-artifact@v4 + with: + name: import-mappings + path: | + scripts/import_mappings.json + retention-days: 7 + + commit-mappings: + name: PR updated import mappings + needs: check-mappings + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v5 + + - name: Download updated files + uses: actions/download-artifact@v4 + with: + name: import-mappings + path: . + + - name: PR with changes + env: + GH_TOKEN: ${{ github.token }} + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + # Check for changes + if [ ! -f scripts/import_mappings.json ]; then + echo "No import mappings file generated" + exit 0 + fi + + if [ ! -s scripts/import_mappings.json ]; then + echo "Empty import mappings file" + exit 0 + fi + + # Create branch with timestamp + BRANCH_NAME="chore/update-import-mappings-$(date +%Y%m%d-%H%M%S)" + git checkout -b "$BRANCH_NAME" + + # Commit changes + git add scripts/import_mappings.json + + # Check if there are staged changes + if git diff --cached --quiet; then + echo "No changes to commit" + exit 0 + fi + + git commit -m "$(cat <<'EOF' + chore: update `langchain_core` import mappings + + šŸ¤– Automated analysis of `langchain_core` re-exports in `langchain` package + + Generated with GitHub Actions workflow `check-import-mappings.yml` + EOF + )" + + git push origin "$BRANCH_NAME" + gh pr create \ + --title "chore: update `langchain_core` import mappings" \ + --body "$(cat <<'EOF' + ## Summary + Automated analysis of `langchain_core` re-exports in `langchain` package + + ## Details + - Analyzes latest releases of `langchain` and `langchain_core` from PyPI + - Identifies all members re-exported from `langchain_core` in `langchain` public `__init__` files + - Stores results in `import_mappings.json` + - Generated by GitHub Actions workflow `check-import-mappings.yml` + - Scheduled to run every 3 days at midnight UTC + + šŸ¤– This PR was created automatically by GitHub Actions + EOF + )" \ + --base main \ + --head "$BRANCH_NAME" diff --git a/.github/workflows/check-pr-imports.yml b/.github/workflows/check-pr-imports.yml new file mode 100644 index 0000000000..e8041f2bc7 --- /dev/null +++ b/.github/workflows/check-pr-imports.yml @@ -0,0 +1,139 @@ +--- +name: Check PR Imports + +on: + pull_request: + branches: [main] + paths: + - "**/*.py" + - "**/*.md" + - "**/*.ipynb" + +jobs: + check-imports: + name: Check for incorrect langchain_core imports + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 # Need full history to compare with base branch + + - name: Set up Python 3.13 + uv + uses: "./.github/actions/uv_setup" + with: + python-version: "3.13" + + - name: Install dependencies + run: | + uv sync --group test + + - name: Ensure import mappings exist + id: check-mappings + run: | + if [ -f "scripts/import_mappings.json" ]; then + echo "mappings_exist=true" >> $GITHUB_OUTPUT + else + echo "mappings_exist=false" >> $GITHUB_OUTPUT + fi + + - name: Generate import mappings if missing + if: steps.check-mappings.outputs.mappings_exist == 'false' + run: | + echo "Import mappings not found, generating..." + uv run scripts/check_import_mappings.py + + - name: Check PR + id: check-imports + run: | + if uv run scripts/check_pr_imports.py > import_check_output.txt 2>&1; then + echo "check_passed=true" >> $GITHUB_OUTPUT + echo "No import issues found" + else + echo "check_passed=false" >> $GITHUB_OUTPUT + echo "Import issues found" + cat import_check_output.txt + fi + + - name: Comment on PR with issues + if: steps.check-imports.outputs.check_passed == 'false' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + let output = ''; + try { + output = fs.readFileSync('import_check_output.txt', 'utf8'); + } catch (error) { + output = 'Error reading import check output'; + } + + const body = `## āŒ Import check failed + + This PR contains imports from \`langchain_core\` that should be imported from \`langchain\` instead. + +
+ Detailed issues + + \`\`\` + ${output} + \`\`\` + +
+ + ### Why this is a problem + + The \`langchain\` package re-exports many modules and classes from \`langchain_core\`. When possible, imports should use \`langchain\` instead of \`langchain_core\` for: + - Better user experience (single import source) + - Consistency across documentation + - Reduced cognitive load for users + + ### How to fix + + Replace the imports as suggested above. For example: + - āŒ \`from langchain_core.messages import HumanMessage\` + - āœ… \`from langchain.messages import HumanMessage\` + + ### šŸ¤– Automated check + + This check is based on the latest analysis of \`langchain\` re-exports from \`langchain_core\`. + `; + + // Check if we already commented + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.data.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('Import check failed') + ); + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: body + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body + }); + } + + - name: Fail the check if issues found + if: steps.check-imports.outputs.check_passed == 'false' + run: | + echo "āŒ Import check failed. Please fix the issues above." + exit 1 diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000000..a9a18d78f2 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +"""Scripts.""" diff --git a/scripts/check_import_mappings.py b/scripts/check_import_mappings.py new file mode 100755 index 0000000000..3e648b4aa0 --- /dev/null +++ b/scripts/check_import_mappings.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +"""Check `langchain_core` re-exports in `langchain`. + +1. Fetch latest releases of `langchain_core` and `langchain` from PyPI +2. Introspect all public `__init__` files in `langchain` +3. Identify members (in `langchain`) that are just re-exports from `langchain_core` +4. Store results in `import_mappings.json` + +Results are used to identify inbound docs PRs that incorrectly include `langchain_core` +imports if they can be imported from `langchain` instead. + +## Output Format (import_mappings.json) + +The generated JSON file contains the following structure: + +```json +{ + "metadata": { + "langchain_version": "1.0.8", // Version of langchain analyzed + "langchain_core_version": "1.1.0", // Version of langchain_core analyzed + "total_init_files": 8 // Number of __init__.py files analyzed + }, + "analysis": [ + { + "file": "/path/to/langchain/messages/__init__.py", // Analyzed file + "langchain_core_imports": { // Raw imports from langchain_core + "HumanMessage": { + "module": "langchain_core.messages", // Source module + "original_name": "HumanMessage" // Original symbol name + } + }, + "all_exports": [ // All symbols exported by this module + "HumanMessage", "AIMessage", "..." + ], + "exported_from_core": { // Subset that comes from langchain_core + "HumanMessage": { + "module": "langchain_core.messages", + "original_name": "HumanMessage" + } + } + } + ], + "summary": { + "total_langchain_core_reexports": 40, // Total re-exported symbols + "modules_with_core_reexports": 5 // Number of modules with re-exports + } +} +``` +""" + +import ast +import importlib.metadata +import json +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Any + + +def get_package_version_after_install(package_name: str) -> str: + """Get version of installed package using importlib.metadata.""" + try: + return importlib.metadata.version(package_name) + except Exception: # noqa: BLE001 + return "unknown" + + +def install_packages(temp_dir: Path, packages: list[str]) -> None: + """Install packages in the temporary directory.""" + + def _raise_uv_not_found() -> None: + msg = "uv not found in PATH" + raise FileNotFoundError(msg) + + uv_path = shutil.which("uv") + if not uv_path: + _raise_uv_not_found() + + assert uv_path is not None # noqa: S101 + uv_cmd = [ + uv_path, + "pip", + "install", + "--target", + str(temp_dir), + "--no-deps", # (Avoid conflicts) + *packages, + ] + + print(f"Installing packages: {packages}") + result = subprocess.run(uv_cmd, check=False, capture_output=True, text=True) # noqa: S603 + if result.returncode != 0: + print(f"Error installing packages: {result.stderr}") + msg = f"Failed to install packages: {result.stderr}" + raise Exception(msg) # noqa: TRY002 + + +def find_init_files(package_path: Path) -> list[Path]: + """Find all `__init__` files in `langchain`.""" + init_files: list[Path] = [] + + langchain_dir = package_path / "langchain" + if not langchain_dir.exists(): + print(f"langchain directory not found at {langchain_dir}") + return init_files + + # Recursively find all __init__.py files + for init_file in langchain_dir.rglob("__init__.py"): + # Skip private/internal modules (those starting with _) + parts = init_file.relative_to(langchain_dir).parts[:-1] # Exclude __init__.py + if any(part.startswith("_") and part != "__init__.py" for part in parts): + continue + init_files.append(init_file) + + return init_files + + +def analyze_init_file(init_file: Path, package_path: Path) -> dict[str, Any]: + """Analyze an `__init__` file to find `langchain_core` re-exports.""" + try: + with init_file.open(encoding="utf-8") as f: + content = f.read() + + tree = ast.parse(content) + + langchain_core_imports = {} + all_exports = [] + + class ImportVisitor(ast.NodeVisitor): + def visit_ImportFrom(self, node): + if node.module and node.module.startswith("langchain_core"): + for alias in node.names: + # The name as it appears in this module (alias or original) + local_name = alias.asname if alias.asname else alias.name + + # Store the import mapping + langchain_core_imports[local_name] = { + "module": node.module, + "original_name": alias.name, + } + + def visit_Assign(self, node): + # Check for __all__ assignments + for target in node.targets: + # Only handle items that are accessible + if ( + isinstance(target, ast.Name) + and target.id == "__all__" + and isinstance(node.value, ast.List) + ): + all_exports.extend( + elt.value + for elt in node.value.elts + if isinstance(elt, ast.Constant) + ) + + visitor = ImportVisitor() + visitor.visit(tree) + + # Find which imported items are also exported + exported_from_core = {} + for export in all_exports: + if export in langchain_core_imports: + exported_from_core[export] = langchain_core_imports[export] + + # Convert to relative path from package root + relative_path = init_file.relative_to(package_path) + + return { + "file": str(relative_path), + "langchain_core_imports": langchain_core_imports, + "all_exports": all_exports, + "exported_from_core": exported_from_core, + } + + except (OSError, SyntaxError, ValueError) as e: + print(f"Error analyzing {init_file}: {e}") + # Convert to relative path from package root + relative_path = init_file.relative_to(package_path) + + return { + "file": str(relative_path), + "error": str(e), + "langchain_core_imports": {}, + "all_exports": [], + "exported_from_core": {}, + } + + +def main(): + """Check import mappings.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + install_packages(temp_path, ["langchain", "langchain_core"]) + sys.path.insert(0, str(temp_path)) + + # Get versions after installation + langchain_version = get_package_version_after_install("langchain") + langchain_core_version = get_package_version_after_install("langchain_core") + + print(f"Installed langchain version: {langchain_version}") + print(f"Installed langchain_core version: {langchain_core_version}") + + init_files = find_init_files(temp_path) + print(f"Found {len(init_files)} __init__.py files") + + results = { + "metadata": { + "langchain_version": langchain_version, + "langchain_core_version": langchain_core_version, + "total_init_files": len(init_files), + }, + "analysis": [], + } + + for init_file in init_files: + print(f"Analyzing: {init_file}") + analysis = analyze_init_file(init_file, temp_path) + # Only include files that have langchain_core imports or exports + if ( + analysis.get("langchain_core_imports") + or analysis.get("all_exports") + or analysis.get("exported_from_core") + ): + results["analysis"].append(analysis) + + total_core_exports = 0 + modules_with_core_exports = 0 + + for analysis in results["analysis"]: + if analysis.get("exported_from_core"): + total_core_exports += len(analysis["exported_from_core"]) + modules_with_core_exports += 1 + + results["summary"] = { + "total_langchain_core_reexports": total_core_exports, + "modules_with_core_reexports": modules_with_core_exports, + } + + print("\nSummary:") + print(f"- Total langchain_core re-exports: {total_core_exports}") + print(f"- Modules with langchain_core re-exports: {modules_with_core_exports}") + + output_file = Path("scripts/import_mappings.json") + with output_file.open("w") as f: + json.dump(results, f, indent=2) + + print(f"\nResults saved to {output_file}") + + +if __name__ == "__main__": + main() diff --git a/scripts/check_pr_imports.py b/scripts/check_pr_imports.py new file mode 100755 index 0000000000..5b4bdb5220 --- /dev/null +++ b/scripts/check_pr_imports.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +"""Check PR diffs for unnecessary `langchain_core` imports. + +This script analyzes PR diffs to identify imports that should use `langchain` +instead of `langchain_core` based on the `import_mappings.json` file generated by +`check_import_mappings.py`. +""" + +import json +import re +import subprocess +import sys +from pathlib import Path +from typing import Any + + +def load_import_mappings() -> dict[str, Any]: + """Load the import mappings from JSON file.""" + mappings_file = Path("scripts/import_mappings.json") + if not mappings_file.exists(): + print( + "Error: import_mappings.json not found. Run check_import_mappings.py first." + ) + sys.exit(1) + + with mappings_file.open() as f: + return json.load(f) + + +def get_pr_diff() -> str: + """Get the diff for the current PR.""" + try: + # Get the base branch (usually main) + result = subprocess.run( + ["git", "merge-base", "HEAD", "origin/main"], # noqa: S607 + capture_output=True, + text=True, + check=True, + ) + base_sha = result.stdout.strip() + + # Get the diff from base to HEAD + result = subprocess.run( # noqa: S603 + ["git", "diff", base_sha, "HEAD"], # noqa: S607 + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + print(f"Error getting PR diff: {e}") + sys.exit(1) + else: + return result.stdout + + +def build_mapping_dict(mappings: dict[str, Any]) -> dict[str, str]: + """Build a dictionary mapping `langchain_core` imports to `langchain` imports.""" + mapping_dict = {} + + for analysis in mappings.get("analysis", []): + exported_from_core = analysis.get("exported_from_core", {}) + if not exported_from_core: + continue + + # Extract module path from file path + file_path = analysis.get("file", "") + if not file_path: + continue + + # Convert file path to module path + # e.g., /path/to/langchain/messages/__init__.py -> langchain.messages + parts = file_path.split("/") + try: + langchain_idx = parts.index("langchain") + module_parts = parts[langchain_idx:-1] # Exclude __init__.py + langchain_module = ".".join(module_parts) + except (ValueError, IndexError): + continue + + # Map each exported symbol + for symbol, info in exported_from_core.items(): + core_module = info.get("module", "") + if core_module: + mapping_dict[f"{core_module}.{symbol}"] = f"{langchain_module}.{symbol}" + # Also map module-level imports + if core_module not in mapping_dict: + mapping_dict[core_module] = langchain_module + + return mapping_dict + + +def check_import_line(line: str, mapping_dict: dict[str, str]) -> list[dict[str, str]]: + """Check a single import line for incorrect `langchain_core` imports.""" + issues = [] + line = line.strip() + + # Match different import patterns + patterns = [ + r"from\s+(langchain_core\.\S+)\s+import\s+(.+)", # Matches `from langchain_core.module import ...` # noqa: E501 + r"import\s+(langchain_core\.\S+)", # Matches `import langchain_core.module` + ] + + for i, pattern in enumerate(patterns): + match = re.match(pattern, line) + if match: + if i == 0: # from ... import ... pattern (2 groups) + # from ... import ... pattern + core_module = match.group(1) + imports = match.group(2) + + # Check if this module should be imported from langchain instead + if core_module in mapping_dict: + langchain_module = mapping_dict[core_module] + suggested_line = f"from {langchain_module} import {imports}" + issues.append( + { + "original": line, + "suggested": suggested_line, + "reason": ( + f"Import from {langchain_module} instead " + f"of {core_module}" + ), + } + ) + else: + # Check individual imports + import_list = [imp.strip() for imp in imports.split(",")] + problematic_imports = [] + for imp in import_list: + # Clean up import (remove aliases, etc.) + clean_imp = imp.split(" as ")[0].strip() + full_import = f"{core_module}.{clean_imp}" + if full_import in mapping_dict: + problematic_imports.append(clean_imp) + + if problematic_imports: + # Find the langchain module for these imports + first_problematic = f"{core_module}.{problematic_imports[0]}" + suggested_module = mapping_dict[first_problematic].rsplit( + ".", 1 + )[0] + suggested_line = f"from {suggested_module} import {imports}" + issues.append( + { + "original": line, + "suggested": suggested_line, + "reason": ( + "These imports are re-exported " + f"from {suggested_module}" + ), + } + ) + else: + # import ... pattern + core_module = match.group(1) + if core_module in mapping_dict: + langchain_module = mapping_dict[core_module] + suggested_line = f"import {langchain_module}" + issues.append( + { + "original": line, + "suggested": suggested_line, + "reason": ( + f"Import {langchain_module} instead of {core_module}" + ), + } + ) + + return issues + + +def analyze_diff(diff: str, mapping_dict: dict[str, str]) -> list[dict[str, Any]]: + """Analyze the diff for import issues.""" + issues: list[dict[str, Any]] = [] + current_file = None + line_number = 0 + + for line in diff.split("\n"): + if line.startswith("+++"): + # New file + current_file = line[6:] # Remove "+++ b/" + line_number = 0 + elif line.startswith("@@"): + # Hunk header - extract line number + match = re.search(r"\+(\d+)", line) + if match: + line_number = int(match.group(1)) + elif line.startswith("+") and not line.startswith("+++"): + # Added line + content = line[1:] # Remove the "+" + + # Check for import statements + if "import" in content and "langchain_core" in content: + import_issues = check_import_line(content, mapping_dict) + issues.extend( + { + "file": current_file, + "line": line_number, + **issue, + } + for issue in import_issues + ) + + line_number += 1 + elif not line.startswith("-") and current_file is not None: + # Context line (not removed, not added, not a header) + line_number += 1 + + return issues + + +def main(): + """Entrypoint.""" + mappings = load_import_mappings() + mapping_dict = build_mapping_dict(mappings) + diff = get_pr_diff() + + print("Analyzing diff for import issues...") + issues = analyze_diff(diff, mapping_dict) + + if not issues: + print("āœ… No import issues found!") + return + + print(f"āŒ Found {len(issues)} import issues:") + print() + + for issue in issues: + print(f"File: {issue['file']}") + print(f"Line: {issue['line']}") + print(f"Issue: {issue['reason']}") + print(f"Current: {issue['original']}") + print(f"Suggested: {issue['suggested']}") + print("-" * 80) + + print(f"\nāŒ Found {len(issues)} import issues that need to be fixed.") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/import_mappings.json b/scripts/import_mappings.json new file mode 100644 index 0000000000..12373bb6b0 --- /dev/null +++ b/scripts/import_mappings.json @@ -0,0 +1,471 @@ +{ + "metadata": { + "langchain_version": "1.0.8", + "langchain_core_version": "1.1.0", + "total_init_files": 8 + }, + "analysis": [ + { + "file": "langchain/tools/__init__.py", + "langchain_core_imports": { + "BaseTool": { + "module": "langchain_core.tools", + "original_name": "BaseTool" + }, + "InjectedToolArg": { + "module": "langchain_core.tools", + "original_name": "InjectedToolArg" + }, + "InjectedToolCallId": { + "module": "langchain_core.tools", + "original_name": "InjectedToolCallId" + }, + "ToolException": { + "module": "langchain_core.tools", + "original_name": "ToolException" + }, + "tool": { + "module": "langchain_core.tools", + "original_name": "tool" + } + }, + "all_exports": [ + "BaseTool", + "InjectedState", + "InjectedStore", + "InjectedToolArg", + "InjectedToolCallId", + "ToolException", + "ToolRuntime", + "tool" + ], + "exported_from_core": { + "BaseTool": { + "module": "langchain_core.tools", + "original_name": "BaseTool" + }, + "InjectedToolArg": { + "module": "langchain_core.tools", + "original_name": "InjectedToolArg" + }, + "InjectedToolCallId": { + "module": "langchain_core.tools", + "original_name": "InjectedToolCallId" + }, + "ToolException": { + "module": "langchain_core.tools", + "original_name": "ToolException" + }, + "tool": { + "module": "langchain_core.tools", + "original_name": "tool" + } + } + }, + { + "file": "langchain/messages/__init__.py", + "langchain_core_imports": { + "AIMessage": { + "module": "langchain_core.messages", + "original_name": "AIMessage" + }, + "AIMessageChunk": { + "module": "langchain_core.messages", + "original_name": "AIMessageChunk" + }, + "Annotation": { + "module": "langchain_core.messages", + "original_name": "Annotation" + }, + "AnyMessage": { + "module": "langchain_core.messages", + "original_name": "AnyMessage" + }, + "AudioContentBlock": { + "module": "langchain_core.messages", + "original_name": "AudioContentBlock" + }, + "Citation": { + "module": "langchain_core.messages", + "original_name": "Citation" + }, + "ContentBlock": { + "module": "langchain_core.messages", + "original_name": "ContentBlock" + }, + "DataContentBlock": { + "module": "langchain_core.messages", + "original_name": "DataContentBlock" + }, + "FileContentBlock": { + "module": "langchain_core.messages", + "original_name": "FileContentBlock" + }, + "HumanMessage": { + "module": "langchain_core.messages", + "original_name": "HumanMessage" + }, + "ImageContentBlock": { + "module": "langchain_core.messages", + "original_name": "ImageContentBlock" + }, + "InputTokenDetails": { + "module": "langchain_core.messages", + "original_name": "InputTokenDetails" + }, + "InvalidToolCall": { + "module": "langchain_core.messages", + "original_name": "InvalidToolCall" + }, + "MessageLikeRepresentation": { + "module": "langchain_core.messages", + "original_name": "MessageLikeRepresentation" + }, + "NonStandardAnnotation": { + "module": "langchain_core.messages", + "original_name": "NonStandardAnnotation" + }, + "NonStandardContentBlock": { + "module": "langchain_core.messages", + "original_name": "NonStandardContentBlock" + }, + "OutputTokenDetails": { + "module": "langchain_core.messages", + "original_name": "OutputTokenDetails" + }, + "PlainTextContentBlock": { + "module": "langchain_core.messages", + "original_name": "PlainTextContentBlock" + }, + "ReasoningContentBlock": { + "module": "langchain_core.messages", + "original_name": "ReasoningContentBlock" + }, + "RemoveMessage": { + "module": "langchain_core.messages", + "original_name": "RemoveMessage" + }, + "ServerToolCall": { + "module": "langchain_core.messages", + "original_name": "ServerToolCall" + }, + "ServerToolCallChunk": { + "module": "langchain_core.messages", + "original_name": "ServerToolCallChunk" + }, + "ServerToolResult": { + "module": "langchain_core.messages", + "original_name": "ServerToolResult" + }, + "SystemMessage": { + "module": "langchain_core.messages", + "original_name": "SystemMessage" + }, + "TextContentBlock": { + "module": "langchain_core.messages", + "original_name": "TextContentBlock" + }, + "ToolCall": { + "module": "langchain_core.messages", + "original_name": "ToolCall" + }, + "ToolCallChunk": { + "module": "langchain_core.messages", + "original_name": "ToolCallChunk" + }, + "ToolMessage": { + "module": "langchain_core.messages", + "original_name": "ToolMessage" + }, + "UsageMetadata": { + "module": "langchain_core.messages", + "original_name": "UsageMetadata" + }, + "VideoContentBlock": { + "module": "langchain_core.messages", + "original_name": "VideoContentBlock" + }, + "trim_messages": { + "module": "langchain_core.messages", + "original_name": "trim_messages" + } + }, + "all_exports": [ + "AIMessage", + "AIMessageChunk", + "Annotation", + "AnyMessage", + "AudioContentBlock", + "Citation", + "ContentBlock", + "DataContentBlock", + "FileContentBlock", + "HumanMessage", + "ImageContentBlock", + "InputTokenDetails", + "InvalidToolCall", + "MessageLikeRepresentation", + "NonStandardAnnotation", + "NonStandardContentBlock", + "OutputTokenDetails", + "PlainTextContentBlock", + "ReasoningContentBlock", + "RemoveMessage", + "ServerToolCall", + "ServerToolCallChunk", + "ServerToolResult", + "SystemMessage", + "TextContentBlock", + "ToolCall", + "ToolCallChunk", + "ToolMessage", + "UsageMetadata", + "VideoContentBlock", + "trim_messages" + ], + "exported_from_core": { + "AIMessage": { + "module": "langchain_core.messages", + "original_name": "AIMessage" + }, + "AIMessageChunk": { + "module": "langchain_core.messages", + "original_name": "AIMessageChunk" + }, + "Annotation": { + "module": "langchain_core.messages", + "original_name": "Annotation" + }, + "AnyMessage": { + "module": "langchain_core.messages", + "original_name": "AnyMessage" + }, + "AudioContentBlock": { + "module": "langchain_core.messages", + "original_name": "AudioContentBlock" + }, + "Citation": { + "module": "langchain_core.messages", + "original_name": "Citation" + }, + "ContentBlock": { + "module": "langchain_core.messages", + "original_name": "ContentBlock" + }, + "DataContentBlock": { + "module": "langchain_core.messages", + "original_name": "DataContentBlock" + }, + "FileContentBlock": { + "module": "langchain_core.messages", + "original_name": "FileContentBlock" + }, + "HumanMessage": { + "module": "langchain_core.messages", + "original_name": "HumanMessage" + }, + "ImageContentBlock": { + "module": "langchain_core.messages", + "original_name": "ImageContentBlock" + }, + "InputTokenDetails": { + "module": "langchain_core.messages", + "original_name": "InputTokenDetails" + }, + "InvalidToolCall": { + "module": "langchain_core.messages", + "original_name": "InvalidToolCall" + }, + "MessageLikeRepresentation": { + "module": "langchain_core.messages", + "original_name": "MessageLikeRepresentation" + }, + "NonStandardAnnotation": { + "module": "langchain_core.messages", + "original_name": "NonStandardAnnotation" + }, + "NonStandardContentBlock": { + "module": "langchain_core.messages", + "original_name": "NonStandardContentBlock" + }, + "OutputTokenDetails": { + "module": "langchain_core.messages", + "original_name": "OutputTokenDetails" + }, + "PlainTextContentBlock": { + "module": "langchain_core.messages", + "original_name": "PlainTextContentBlock" + }, + "ReasoningContentBlock": { + "module": "langchain_core.messages", + "original_name": "ReasoningContentBlock" + }, + "RemoveMessage": { + "module": "langchain_core.messages", + "original_name": "RemoveMessage" + }, + "ServerToolCall": { + "module": "langchain_core.messages", + "original_name": "ServerToolCall" + }, + "ServerToolCallChunk": { + "module": "langchain_core.messages", + "original_name": "ServerToolCallChunk" + }, + "ServerToolResult": { + "module": "langchain_core.messages", + "original_name": "ServerToolResult" + }, + "SystemMessage": { + "module": "langchain_core.messages", + "original_name": "SystemMessage" + }, + "TextContentBlock": { + "module": "langchain_core.messages", + "original_name": "TextContentBlock" + }, + "ToolCall": { + "module": "langchain_core.messages", + "original_name": "ToolCall" + }, + "ToolCallChunk": { + "module": "langchain_core.messages", + "original_name": "ToolCallChunk" + }, + "ToolMessage": { + "module": "langchain_core.messages", + "original_name": "ToolMessage" + }, + "UsageMetadata": { + "module": "langchain_core.messages", + "original_name": "UsageMetadata" + }, + "VideoContentBlock": { + "module": "langchain_core.messages", + "original_name": "VideoContentBlock" + }, + "trim_messages": { + "module": "langchain_core.messages", + "original_name": "trim_messages" + } + } + }, + { + "file": "langchain/embeddings/__init__.py", + "langchain_core_imports": { + "Embeddings": { + "module": "langchain_core.embeddings", + "original_name": "Embeddings" + } + }, + "all_exports": [ + "Embeddings", + "init_embeddings" + ], + "exported_from_core": { + "Embeddings": { + "module": "langchain_core.embeddings", + "original_name": "Embeddings" + } + } + }, + { + "file": "langchain/chat_models/__init__.py", + "langchain_core_imports": { + "BaseChatModel": { + "module": "langchain_core.language_models", + "original_name": "BaseChatModel" + } + }, + "all_exports": [ + "BaseChatModel", + "init_chat_model" + ], + "exported_from_core": { + "BaseChatModel": { + "module": "langchain_core.language_models", + "original_name": "BaseChatModel" + } + } + }, + { + "file": "langchain/agents/__init__.py", + "langchain_core_imports": {}, + "all_exports": [ + "AgentState", + "create_agent" + ], + "exported_from_core": {} + }, + { + "file": "langchain/rate_limiters/__init__.py", + "langchain_core_imports": { + "BaseRateLimiter": { + "module": "langchain_core.rate_limiters", + "original_name": "BaseRateLimiter" + }, + "InMemoryRateLimiter": { + "module": "langchain_core.rate_limiters", + "original_name": "InMemoryRateLimiter" + } + }, + "all_exports": [ + "BaseRateLimiter", + "InMemoryRateLimiter" + ], + "exported_from_core": { + "BaseRateLimiter": { + "module": "langchain_core.rate_limiters", + "original_name": "BaseRateLimiter" + }, + "InMemoryRateLimiter": { + "module": "langchain_core.rate_limiters", + "original_name": "InMemoryRateLimiter" + } + } + }, + { + "file": "langchain/agents/middleware/__init__.py", + "langchain_core_imports": {}, + "all_exports": [ + "AgentMiddleware", + "AgentState", + "ClearToolUsesEdit", + "CodexSandboxExecutionPolicy", + "ContextEditingMiddleware", + "DockerExecutionPolicy", + "FilesystemFileSearchMiddleware", + "HostExecutionPolicy", + "HumanInTheLoopMiddleware", + "InterruptOnConfig", + "LLMToolEmulator", + "LLMToolSelectorMiddleware", + "ModelCallLimitMiddleware", + "ModelFallbackMiddleware", + "ModelRequest", + "ModelResponse", + "PIIDetectionError", + "PIIMiddleware", + "RedactionRule", + "ShellToolMiddleware", + "SummarizationMiddleware", + "TodoListMiddleware", + "ToolCallLimitMiddleware", + "ToolRetryMiddleware", + "after_agent", + "after_model", + "before_agent", + "before_model", + "dynamic_prompt", + "hook_config", + "wrap_model_call", + "wrap_tool_call" + ], + "exported_from_core": {} + } + ], + "summary": { + "total_langchain_core_reexports": 40, + "modules_with_core_reexports": 5 + } +} \ No newline at end of file diff --git a/tests/unit_tests/test_check_pr_imports.py b/tests/unit_tests/test_check_pr_imports.py new file mode 100644 index 0000000000..331131b790 --- /dev/null +++ b/tests/unit_tests/test_check_pr_imports.py @@ -0,0 +1,315 @@ +"""Tests for `check_pr_imports.py`.""" + +import json +import tempfile +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from scripts.check_pr_imports import ( + analyze_diff, + build_mapping_dict, + check_import_line, + load_import_mappings, +) + + +def test_load_existing_mappings() -> None: + """Test loading existing import mappings file.""" + test_data = { + "metadata": {"langchain_version": "1.0.0"}, + "analysis": [], + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json.dump(test_data, f) + temp_path = f.name + + # Mock Path to return temp file + with patch("scripts.check_pr_imports.Path") as mock_path: + mock_path.return_value.exists.return_value = True + temp_file_path = Path(temp_path) + mock_path.return_value.open.return_value.__enter__ = ( + lambda _: temp_file_path.open() + ) + mock_path.return_value.open.return_value.__exit__ = Mock(return_value=None) + + result = load_import_mappings() + assert result == test_data + + # Clean up + Path(temp_path).unlink() + + +def test_missing_mappings_file() -> None: + """Test behavior when mappings file doesn't exist.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Use a non-existent file path + nonexistent_file = Path(temp_dir) / "nonexistent.json" + + with patch("scripts.check_pr_imports.Path") as mock_path: + mock_path.return_value = nonexistent_file + + with patch("builtins.print") as mock_print: + with pytest.raises(SystemExit) as exc_info: + load_import_mappings() + + assert exc_info.value.code == 1 + mock_print.assert_called_once() + + +def test_build_mapping_dict_empty() -> None: + """Test building mapping dictionary with empty analysis.""" + mappings: dict[str, list] = {"analysis": []} + result = build_mapping_dict(mappings) + assert result == {} + + +def test_build_mapping_dict_no_exports() -> None: + """Test building mapping dictionary with no exported_from_core.""" + mappings = { + "analysis": [ + { + "file": "/temp/langchain/messages/__init__.py", + "exported_from_core": {}, + } + ] + } + result = build_mapping_dict(mappings) + assert result == {} + + +def test_build_mapping_dict_invalid_path() -> None: + """Test building mapping dictionary with invalid file path.""" + mappings = { + "analysis": [ + { + "file": "/invalid/path/without/lc/__init__.py", + "exported_from_core": { + "HumanMessage": { + "module": "langchain_core.messages", + "original_name": "HumanMessage", + }, + }, + } + ] + } + result = build_mapping_dict(mappings) + # Should be empty because "langchain" is not in the path + assert result == {} + + +def test_build_mapping_dict_basic() -> None: + """Test building mapping dictionary from analysis data.""" + mappings = { + "analysis": [ + { + "file": "/temp/langchain/messages/__init__.py", + "exported_from_core": { + "HumanMessage": { + "module": "langchain_core.messages", + "original_name": "HumanMessage", + }, + "AIMessage": { + "module": "langchain_core.messages", + "original_name": "AIMessage", + }, + }, + }, + { + "file": "/temp/langchain/tools/__init__.py", + "exported_from_core": { + "tool": { + "module": "langchain_core.tools", + "original_name": "tool", + }, + }, + }, + ] + } + + result = build_mapping_dict(mappings) + + expected = { + "langchain_core.messages.HumanMessage": "langchain.messages.HumanMessage", + "langchain_core.messages.AIMessage": "langchain.messages.AIMessage", + "langchain_core.messages": "langchain.messages", + "langchain_core.tools.tool": "langchain.tools.tool", + "langchain_core.tools": "langchain.tools", + } + + assert result == expected + + +@pytest.fixture +def mapping_dict() -> dict[str, str]: + """Test mapping dictionary fixture.""" + return { + "langchain_core.messages": "langchain.messages", + "langchain_core.messages.HumanMessage": "langchain.messages.HumanMessage", + "langchain_core.messages.AIMessage": "langchain.messages.AIMessage", + "langchain_core.tools": "langchain.tools", + "langchain_core.tools.tool": "langchain.tools.tool", + } + + +def test_from_import_module_mapping(mapping_dict: dict[str, str]) -> None: + """Test from import with module-level mapping.""" + line = "from langchain_core.messages import HumanMessage" + issues = check_import_line(line, mapping_dict) + + assert len(issues) == 1 + issue = issues[0] + assert issue["original"] == line + assert issue["suggested"] == "from langchain.messages import HumanMessage" + assert "Import from langchain.messages instead" in issue["reason"] + + +def test_from_import_multiple_items(mapping_dict: dict[str, str]) -> None: + """Test from import with multiple items.""" + line = "from langchain_core.messages import HumanMessage, AIMessage" + issues = check_import_line(line, mapping_dict) + + assert len(issues) == 1 + issue = issues[0] + assert issue["original"] == line + expected_suggestion = "from langchain.messages import HumanMessage, AIMessage" + assert issue["suggested"] == expected_suggestion + + +def test_from_import_with_alias(mapping_dict: dict[str, str]) -> None: + """Test from import with alias.""" + line = "from langchain_core.messages import HumanMessage as HM" + issues = check_import_line(line, mapping_dict) + + assert len(issues) == 1 + issue = issues[0] + assert issue["original"] == line + assert issue["suggested"] == "from langchain.messages import HumanMessage as HM" + + +def test_direct_import(mapping_dict: dict[str, str]) -> None: + """Test direct module import.""" + line = "import langchain_core.messages" + issues = check_import_line(line, mapping_dict) + + assert len(issues) == 1 + issue = issues[0] + assert issue["original"] == line + assert issue["suggested"] == "import langchain.messages" + assert "Import langchain.messages instead" in issue["reason"] + + +def test_no_mapping_found(mapping_dict: dict[str, str]) -> None: + """Test line with no mapping available.""" + line = "from langchain_core.unknown import Something" + issues = check_import_line(line, mapping_dict) + assert len(issues) == 0 + + +def test_non_langchain_core_import(mapping_dict: dict[str, str]) -> None: + """Test line that doesn't import from `langchain_core`. + + e.g. an already correct import. + """ + line = "from langchain.messages import HumanMessage" + issues = check_import_line(line, mapping_dict) + assert len(issues) == 0 + + +def test_analyze_simple_diff(mapping_dict: dict[str, str]) -> None: + """Test analyzing a simple diff with one issue.""" + diff = """diff --git a/test.py b/test.py +index 1234567..abcdefg 100644 +--- a/test.py ++++ b/test.py +@@ -1,3 +1,4 @@ + import os ++from langchain_core.messages import HumanMessage + + def main(): +""" + + issues = analyze_diff(diff, mapping_dict) + + assert len(issues) == 1 + issue = issues[0] + assert issue["file"] == "test.py" + assert issue["line"] == 2 + assert issue["original"] == "from langchain_core.messages import HumanMessage" + assert issue["suggested"] == "from langchain.messages import HumanMessage" + + +def test_analyze_multiple_files_diff(mapping_dict: dict[str, str]) -> None: + """Test analyzing diff with multiple files.""" + diff = """diff --git a/file1.py b/file1.py +index 1234567..abcdefg 100644 +--- a/file1.py ++++ b/file1.py +@@ -1,2 +1,3 @@ + import os ++from langchain_core.messages import HumanMessage +diff --git a/file2.py b/file2.py +index 2345678..bcdefgh 100644 +--- a/file2.py ++++ b/file2.py +@@ -10,3 +10,4 @@ def func(): + pass + ++import langchain_core.messages +""" + + issues = analyze_diff(diff, mapping_dict) + + assert len(issues) == 2 + + # First issue + assert issues[0]["file"] == "file1.py" + assert issues[0]["line"] == 2 + assert issues[0]["original"] == "from langchain_core.messages import HumanMessage" + + # Second issue + assert issues[1]["file"] == "file2.py" + assert issues[1]["line"] == 12 + assert issues[1]["original"] == "import langchain_core.messages" + + +def test_analyze_diff_no_issues(mapping_dict: dict[str, str]) -> None: + """Test analyzing diff with no import issues.""" + diff = """diff --git a/test.py b/test.py +index 1234567..abcdefg 100644 +--- a/test.py ++++ b/test.py +@@ -1,3 +1,4 @@ + import os ++from langchain.messages import HumanMessage + + def main(): +""" + + issues = analyze_diff(diff, mapping_dict) + assert len(issues) == 0 + + +def test_analyze_diff_removed_lines(mapping_dict: dict[str, str]) -> None: + """Test analyzing diff with removed lines (should be ignored).""" + diff = """diff --git a/test.py b/test.py +index 1234567..abcdefg 100644 +--- a/test.py ++++ b/test.py +@@ -1,4 +1,3 @@ + import os +-from langchain_core.messages import HumanMessage + + def main(): +""" + + issues = analyze_diff(diff, mapping_dict) + assert len(issues) == 0 + + +def test_analyze_empty_diff(mapping_dict: dict[str, str]) -> None: + """Test analyzing empty diff.""" + issues = analyze_diff("", mapping_dict) + assert len(issues) == 0