macrocosm-os · bkb2135 · Apr 2, 2025 · Mar 21, 2025
diff --git a/shared/epistula.py b/shared/epistula.py
@@ -240,6 +240,8 @@ async def make_openai_query(
         choices = []
         chunks = []
         chunk_timings = []
+        last_finish_reason = None  # Only track the finish reason of the last chunk
+
         async for chunk in chat:
             if not chunk.choices:
                 continue
@@ -248,11 +250,19 @@ async def make_openai_query(
                     choices.append("")
                 if choice.delta.content:
                     choices[i] += choice.delta.content
+                # Save finish reason from the last chunk, safely handling the attribute
+                if hasattr(choice, "finish_reason") and choice.finish_reason is not None:
+                    last_finish_reason = choice.finish_reason
             if chunk.choices[0].delta.content:
                 chunks.append(chunk.choices[0].delta.content)
                 chunk_timings.append(time.perf_counter() - start_time)
+
         choices = [
-            Choice(index=i, message=ChatCompletionMessage(content=choice, role="assistant"), finish_reason="stop")
+            Choice(
+                index=i,
+                message=ChatCompletionMessage(content=choice, role="assistant"),
+                finish_reason=last_finish_reason or "stop",  # Use the captured finish_reason or fallback to "stop"
+            )
             for i, choice in enumerate(choices)
         ]
         # TODO: We need to find a better way to do this instead of sometimes returning a tuple and sometimes not, but for now this has to do