svilupp · svilupp · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - "gpt3" still refers to the general endpoint "gpt-3.5-turbo", which OpenAI will move to version 0125 by mid-February (ie, "gpt3t" will be the same as "gpt3" then. We have reflected the approximate cost in the model registry but note that it will be incorrect in the transition period)
   - "emb3small" refers to the small version of the new embedding model (dim=1536), which is 5x cheaper than Ada and promises higher quality
   - "emb3large" refers to the large version of the new embedding model (dim=3072), which is only 30% more expensive than Ada
+- Improved AgentTools: added more information and specific methods to `aicode_feedback` and `error_feedback` to pass more targeted feedback/tips to the AIAgent
+- Improved detection of which lines were the source of error during `AICode` evaluation + forcing the error details to be printed in `AICode(...).stdout` for downstream analysis.
 
 ### Fixed
 - Fixed typos in the documentation

diff --git a/src/code_eval.jl b/src/code_eval.jl
@@ -352,13 +352,25 @@ function eval!(cb::AbstractCodeBlock, expr::Expr;
             cb.success = false
         end
     end
+    ## showerror if stdout capture failed
+    if (isnothing(cb.stdout) || isempty(cb.stdout)) && !isnothing(cb.error)
+        io = IOBuffer()
+        showerror(io, cb.error isa LoadError ? cb.error.error : cb.error)
+        cb.stdout = String(take!(io))
+    end
     ## unwrap load error
     if cb.error isa LoadError
         push!(cb.error_lines, cb.error.line)
-        append!(cb.error_lines, extract_stacktrace_lines(cb.error.file, cb.stdout))
+        for line in extract_stacktrace_lines(cb.error.file, cb.stdout)
+            (line ∉ cb.error_lines) && push!(cb.error_lines, line)
+        end
         cb.error = cb.error.error
     elseif !isnothing(cb.error)
-        append!(cb.error_lines, extract_stacktrace_lines("__code_string_eval", cb.stdout))
+        ## fallback, looks for errors only in the original code (cb.code)
+        lines = extract_stacktrace_lines("__code_string_eval", cb.stdout)
+        for line in lines
+            (line ∉ cb.error_lines) && push!(cb.error_lines, line)
+        end
     end
     return cb
 end
diff --git a/test/Experimental/AgentTools/code_feedback.jl b/test/Experimental/AgentTools/code_feedback.jl
@@ -30,23 +30,22 @@ using PromptingTools.Experimental.AgentTools: testset_feedback,
     schedule(tsk)
     fetch(tsk)
     """)
-    cb.stdout = "STDOUT"
     feedback = aicodefixer_feedback(CodeFailedEval(), cb)
-    @test feedback ==
-          "**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- fetch(tsk)\n\n**Output Captured:**\n STDOUT"
+    @test occursin("**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- fetch(tsk)",
+        feedback)
+
     cb = AICode("error(\"xx\")")
-    cb.stdout = "STDOUT"
     feedback = aicodefixer_feedback(CodeFailedEval(), cb)
     @test feedback ==
-          "**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- error(\"xx\")\n\n**Output Captured:**\n STDOUT"
+          "**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- error(\"xx\")\n\n**Output Captured:**\n xx"
     conv = [PT.AIMessage("""
     ```julia
     error(\"xx\")
     ```
     """)]
     feedback = aicodefixer_feedback(conv).feedback
     @test feedback ==
-          "**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- error(\"xx\")"
+          "**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- error(\"xx\")\n\n**Output Captured:**\n xx"
 
     # CodeFailedTimeout
     cb = AICode("InterruptException()")

diff --git a/test/code_eval.jl b/test/code_eval.jl
@@ -47,6 +47,18 @@ using PromptingTools: extract_module_name
     @test cb.success == false
     @test cb.error == UndefVarError(:b)
     @test cb.error_lines == [1]
+    # despite not capturing stdout, we always unwrap the error to be able to detect error lines
+    @test occursin("UndefVarError", cb.stdout)
+
+    # provide expression directly
+    cb = AICode("""
+    bad_func()=1
+    """)
+    expr = Meta.parseall("bad_func(1)")
+    eval!(cb, expr; capture_stdout = false, eval_module = cb.output)
+    @test cb.success == false
+    @test cb.error isa MethodError
+    @test cb.error_lines == [1]
 end
 
 ## Addition, needs to be outside of @testset