Skip to content

Commit

Permalink
upstage[patch] : fix error handling in Layout Analysis parser (#22054)
Browse files Browse the repository at this point in the history
This pull request addresses and fixes exception handling in the
UpstageLayoutAnalysisParser and enhances the test coverage by adding
error exception tests for the document loader. These improvements ensure
robust error handling and increase the reliability of the system when
dealing with external API calls and JSON responses.

### Changes Made
1. Fix Request Exception Handling:

- Issue: The existing implementation of UpstageLayoutAnalysisParser did
not properly handle exceptions thrown by the requests library, which
could lead to unhandled exceptions and potential crashes.
- Solution: Added comprehensive exception handling for
requests.RequestException to catch any request-related errors. This
includes logging the error details and raising a ValueError with a
meaningful error message.

2. Add Error Exception Tests for Document Loader:

- New Tests: Introduced new test cases to verify the robustness of the
UpstageLayoutAnalysisLoader against various error scenarios. The tests
ensure that the loader gracefully handles:
- RequestException: Simulates network issues or invalid API requests to
ensure appropriate error handling and user feedback.
- JSONDecodeError: Simulates scenarios where the API response is not a
valid JSON, ensuring the system does not crash and provides clear error
messaging.
  • Loading branch information
junkeon committed May 23, 2024
1 parent d9eff44 commit 4fda7bf
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -181,19 +181,22 @@ def _get_response(self, files: Dict) -> List:

result = response.json().get("elements", [])

elements = [
element for element in result if element["category"] not in self.exclude
]

return elements

except requests.RequestException as req_err:
# Handle any request-related exceptions
print(f"Request Exception: {req_err}")
raise ValueError(f"Failed to send request: {req_err}")
except json.JSONDecodeError as json_err:
# Handle JSON decode errors
print(f"JSON Decode Error: {json_err}")
raise ValueError(f"Failed to decode JSON response: {json_err}")

elements = [
element for element in result if element["category"] not in self.exclude
]

return elements
return []

def _split_and_request(
self,
Expand Down
46 changes: 46 additions & 0 deletions libs/partners/upstage/tests/unit_tests/test_layout_analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import json
from pathlib import Path
from typing import Any, Dict, get_args
from unittest import TestCase
from unittest.mock import MagicMock, Mock, patch

import requests

from langchain_upstage import UpstageLayoutAnalysisLoader
from langchain_upstage.layout_analysis import OutputType, SplitType

Expand Down Expand Up @@ -205,3 +209,45 @@ def test_page_split_html_output(mock_post: Mock) -> None:
assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
assert document.metadata["type"] == "html"
assert document.metadata["split"] == "page"


@patch("requests.post")
def test_request_exception(mock_post: Mock) -> None:
mock_post.side_effect = requests.RequestException("Mocked request exception")

loader = UpstageLayoutAnalysisLoader(
file_path=EXAMPLE_PDF_PATH,
output_type="html",
split="page",
api_key="valid_api_key",
exclude=[],
)

with TestCase.assertRaises(TestCase(), ValueError) as context:
loader.load()

assert "Failed to send request: Mocked request exception" == str(context.exception)


@patch("requests.post")
def test_json_decode_error(mock_post: Mock) -> None:
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0)
mock_post.return_value = mock_response

loader = UpstageLayoutAnalysisLoader(
file_path=EXAMPLE_PDF_PATH,
output_type="html",
split="page",
api_key="valid_api_key",
exclude=[],
)

with TestCase.assertRaises(TestCase(), ValueError) as context:
loader.load()

assert (
"Failed to decode JSON response: Expecting value: line 1 column 1 (char 0)"
== str(context.exception)
)

0 comments on commit 4fda7bf

Please sign in to comment.