5
5
# the root directory of this source tree.
6
6
from typing import Generator
7
7
from termcolor import cprint
8
- from llama_stack_client .types import ChatCompletionResponseStreamChunk , ChatCompletionChunk
8
+ from llama_stack_client .types import ChatCompletionChunk
9
9
10
10
11
11
class InferenceStreamPrintableEvent :
@@ -28,35 +28,11 @@ def __init__(self):
28
28
self .is_thinking = False
29
29
30
30
def yield_printable_events (
31
- self , chunk : ChatCompletionResponseStreamChunk | ChatCompletionChunk
31
+ self , chunk : ChatCompletionChunk
32
32
) -> Generator [InferenceStreamPrintableEvent , None , None ]:
33
- # Check if the chunk has event attribute (ChatCompletionResponseStreamChunk)
34
- if hasattr (chunk , "event" ):
35
- yield from self ._handle_inference_stream_chunk (chunk )
36
- # Check if the chunk has choices attribute (ChatCompletionChunk)
37
- elif hasattr (chunk , "choices" ) and len (chunk .choices ) > 0 :
33
+ if hasattr (chunk , "choices" ) and len (chunk .choices ) > 0 :
38
34
yield from self ._handle_chat_completion_chunk (chunk )
39
35
40
- def _handle_inference_stream_chunk (
41
- self , chunk : ChatCompletionResponseStreamChunk
42
- ) -> Generator [InferenceStreamPrintableEvent , None , None ]:
43
- event = chunk .event
44
- if event .event_type == "start" :
45
- yield InferenceStreamPrintableEvent ("Assistant> " , color = "cyan" , end = "" )
46
- elif event .event_type == "progress" :
47
- if event .delta .type == "reasoning" :
48
- if not self .is_thinking :
49
- yield InferenceStreamPrintableEvent ("<thinking> " , color = "magenta" , end = "" )
50
- self .is_thinking = True
51
- yield InferenceStreamPrintableEvent (event .delta .reasoning , color = "magenta" , end = "" )
52
- else :
53
- if self .is_thinking :
54
- yield InferenceStreamPrintableEvent ("</thinking>" , color = "magenta" , end = "" )
55
- self .is_thinking = False
56
- yield InferenceStreamPrintableEvent (event .delta .text , color = "yellow" , end = "" )
57
- elif event .event_type == "complete" :
58
- yield InferenceStreamPrintableEvent ("" )
59
-
60
36
def _handle_chat_completion_chunk (
61
37
self , chunk : ChatCompletionChunk
62
38
) -> Generator [InferenceStreamPrintableEvent , None , None ]:
0 commit comments