Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.1.3] - 2025-07-05

### Fixed
- Fixed issue where wrapper text following JSON blocks was not recognized (#1)
- Added dedicated `remove_trailing_wrapper_text/1` function in Layer 1
- Now properly removes trailing text after valid JSON structures
- Example: `[{"id": 1}]\n1 Volume(s) created` → `[{"id": 1}]`

## [0.1.2] - 2025-06-08

### Added
Expand Down
65 changes: 64 additions & 1 deletion lib/json_remedy/layer1/content_cleaning.ex
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
# Then try to extract from prose/text
{result, prose_repairs} = extract_from_prose(result)

all_repairs = existing_repairs ++ html_repairs ++ prose_repairs
# Finally, remove any trailing wrapper text after JSON
{result, trailing_repairs} = remove_trailing_wrapper_text(result)

all_repairs = existing_repairs ++ html_repairs ++ prose_repairs ++ trailing_repairs
{result, all_repairs}
end

Expand Down Expand Up @@ -682,6 +685,66 @@ defmodule JsonRemedy.Layer1.ContentCleaning do
find_balanced_end(rest, open, close, pos + 1, balance, in_string)
end

# Remove trailing wrapper text after JSON
defp remove_trailing_wrapper_text(input) do
trimmed = String.trim(input)

# Check if input starts with JSON structure
cond do
String.starts_with?(trimmed, "{") ->
check_and_remove_trailing_text(input, "{", "}")

String.starts_with?(trimmed, "[") ->
check_and_remove_trailing_text(input, "[", "]")

true ->
{input, []}
end
end

defp check_and_remove_trailing_text(input, open_char, close_char) do
# Find where the JSON structure starts
json_start =
case String.split(input, open_char, parts: 2) do
[prefix, _] -> String.length(prefix)
_ -> 0
end

# Extract from the JSON start to find the balanced end
substring_from_json = String.slice(input, json_start, String.length(input))

case find_balanced_end(substring_from_json, open_char, close_char) do
nil ->
# Could not find balanced end, return as is
{input, []}

end_pos ->
# Calculate the absolute position where JSON ends
json_end = json_start + end_pos + 1

# Check if there's non-whitespace content after JSON ends
after_json = String.slice(input, json_end, String.length(input))

if String.trim(after_json) == "" do
# No significant trailing content
{input, []}
else
# Extract only the JSON portion
json_content = String.slice(input, 0, json_end)

repair = %{
layer: :content_cleaning,
action: "removed trailing wrapper text",
position: json_end,
original: input,
replacement: json_content
}

{json_content, [repair]}
end
end
end

# Helper functions for string detection using direct methods

# Fast check for long text that likely contains JSON content
Expand Down
2 changes: 1 addition & 1 deletion mix.exs
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defmodule JsonRemedy.MixProject do
use Mix.Project

@version "0.1.1"
@version "0.1.3"
@source_url "https://github.com/nshkrdotcom/json_remedy"

def project do
Expand Down
30 changes: 30 additions & 0 deletions test/unit/layer1_content_cleaning_test.exs
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,36 @@ defmodule JsonRemedy.Layer1.ContentCleaningTest do
assert length(context.repairs) > 0
end
end

test "extracts json with trailing wrapper text (GitHub issue #1)" do
# This test case reproduces the issue where JSON followed by text is not cleaned
input = """
[
{
"volumeID": "f3a6ffd2-0111-4235-980c-a5ceec215e93",
"name": "km-tst-20",
"cloudID": "75b10103873d4a1ba0d52b43159a2842",
"size": 1,
"storageType": "ssd",
"state": "creating",
"shareable": false,
"bootable": false,
"volumePool": "General-Flash-002"
}
]
1 Volume(s) created
"""

{:ok, result, context} = ContentCleaning.process(input, %{repairs: [], options: []})

# Should extract only the JSON array, removing the trailing text
trimmed_result = String.trim(result)
assert String.starts_with?(trimmed_result, "[")
assert String.ends_with?(trimmed_result, "]")
assert not String.contains?(result, "1 Volume(s) created")
assert length(context.repairs) > 0
assert hd(context.repairs).action =~ "removed trailing wrapper text"
end
end

describe "encoding normalization" do
Expand Down