diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py
index 6b74645a8f..04a135537f 100644
--- a/dspy/evaluate/evaluate.py
+++ b/dspy/evaluate/evaluate.py
@@ -1,10 +1,9 @@
 import logging
-import tqdm
 import types
 from typing import Any
 
 import pandas as pd
-
+import tqdm
 
 import dspy
 from dspy.utils.parallelizer import ParallelExecutor
@@ -14,7 +13,7 @@
     from IPython.display import display as display
 
 except ImportError:
-    
+
     def display(obj: Any):
         """
         Display the specified Python object in the console.
@@ -41,6 +40,7 @@ def HTML(x: str) -> str:
 
 logger = logging.getLogger(__name__)
 
+
 class Evaluate:
     def __init__(
         self,
@@ -66,7 +66,6 @@ def __init__(
         self.return_outputs = return_outputs
         self.provide_traceback = provide_traceback
 
-
     def __call__(
         self,
         program,
@@ -131,11 +130,9 @@ def process_item(item):
             results = [(example, prediction, score) for _, example, prediction, score in predicted_devset]
 
         def prediction_is_dictlike(prediction):
-            try:
-                dict(prediction)
-                return True
-            except Exception:
-                return False
+            # Downstream logic for displaying dictionary-like predictions depends solely on the predictions
+            # having a method called `items()` for iterating through key/value pairs
+            return hasattr(prediction, "items") and callable(getattr(prediction, "items"))
 
         data = [
             (
diff --git a/tests/evaluate/test_evaluate.py b/tests/evaluate/test_evaluate.py
index 7615c70b32..048d4c08b5 100644
--- a/tests/evaluate/test_evaluate.py
+++ b/tests/evaluate/test_evaluate.py
@@ -8,7 +8,6 @@
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.evaluate.metrics import answer_exact_match
-from dspy.functional import TypedPredictor
 from dspy.predict import Predict
 from dspy.utils.dummies import DummyLM
 
@@ -125,14 +124,22 @@ def test_evaluate_call_bad():
     "program_with_example",
     [
         (Predict("question -> answer"), new_example("What is 1+1?", "2")),
+        # Create programs that do not return dictionary-like objects because Evaluate()
+        # has failed for such cases in the past
         (
-            # Create a program that extracts entities from text and returns them as a list,
-            # rather than returning a Predictor() wrapper. This is done intentionally to test
-            # the case where the program does not output a dictionary-like object because
-            # Evaluate() has failed for this case in the past
-            lambda text: TypedPredictor("text: str -> entities: List[str]")(text=text).entities,
+            lambda text: Predict("text: str -> entities: List[str]")(text=text).entities,
             dspy.Example(text="United States", entities=["United States"]).with_inputs("text"),
         ),
+        (
+            lambda text: Predict("text: str -> entities: List[Dict[str, str]]")(text=text).entities,
+            dspy.Example(text="United States", entities=[{"name": "United States", "type": "location"}]).with_inputs(
+                "text"
+            ),
+        ),
+        (
+            lambda text: Predict("text: str -> first_word: Tuple[str, int]")(text=text).words,
+            dspy.Example(text="United States", first_word=("United", 6)).with_inputs("text"),
+        ),
     ],
 )
 @pytest.mark.parametrize("display_table", [True, False, 1])