In [55]:
import os
import getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ") 

In [56]:
os.environ["GOOGLE_API_KEY"] = os.environ.get("GOOGLE_API_KEY") 

os.environ["LANGCHAIN_API_KEY"] = os.environ.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "macbook_deepseek"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

In [57]:
from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain.schema import HumanMessage
from langchain.chat_models import ChatOllama
from IPython.display import display, Markdown, clear_output
import sys

In [62]:
class JupyterStreamHandler(BaseCallbackHandler):
    """Custom callback handler for streaming tokens with Markdown rendering in Jupyter Notebook."""
    
    def __init__(self, max_width=80):
        self.buffer = ""  # Buffer for accumulated tokens
        self.current_line = ""  # Current line for width tracking
        self.max_width = max_width  # Maximum width for lines
    
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        # Add the token to the current line
        if len(self.current_line) + len(token) > self.max_width:
            # If the line exceeds max width, append it to the buffer with a newline
            self.buffer += self.current_line + "\n"
            self.current_line = ""  # Reset the current line
        
        # Add the token to the current line
        self.current_line += token

        # Dynamically render the output
        clear_output(wait=True)
        display(Markdown(self.buffer + self.current_line))  # Combine buffer and current line for display

    def on_llm_end(self, *args, **kwargs) -> None:
        # Flush any remaining content in the current line
        if self.current_line:
            self.buffer += self.current_line + "\n"
            self.current_line = ""
        
        # Display the final content
        clear_output(wait=True)
        display(Markdown(self.buffer))
        self.buffer = ""  # Clear the buffer after completion

# Initialize the custom stream handler with a max width of 80 characters
stream_handler = JupyterStreamHandler(max_width=110)

In [63]:
# Create a CallbackManager and add the custom handler
callback_manager = CallbackManager([stream_handler])

In [None]:
base_url = "http://localhost:11434"
local_llm = "deepseek-r1:8b"
llm = ChatOllama(model=local_llm, 
                 base_url=base_url, 
                 temperature=0, 
                 callback_manager=callback_manager,
                 streaming=True)

In [None]:
# Construct a human message
message = HumanMessage(content="""
                       What are the steps for running kubernetes on docker? 
                       I would like to create applications and run them in containers running on pods.
                       I would also like to integrate opentelemtry for tracing and monitoring.
                       Provide python code examples as well as any example yaml files required for docker.
                       """)

# Run the chain and stream the response
response = llm([message])