## Load the required library

In [1]:
import os
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
import gradio as gr
from dotenv import load_dotenv
from langchain_core.callbacks import StdOutCallbackHandler

## Load the API Key

Note that since we are using the internal LLM service (llm.ai.broadcom.net). It uses vLLM which provides the OpenAPI style API, so all that we need to do is to call the OpenAI API. To obtain the API key for the internal LLM service, you follow the instruction [here](https://auth.esp.vmware.com/api-tokens/)

In [2]:
# Initialize OpenAI LLM
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('VMWARE_LLM_API_KEY', 'your-key-if-not-using-env')
llm = ChatOpenAI(temperature=0.7, model="meta-llama/Meta-Llama-3-70B-Instruct", base_url="https://llm.ai.broadcom.net/api/v1")

## Create a conversation chain and chat function for Gradio

In [3]:
# Create a conversation chain with memory
memory = ConversationBufferMemory()
conversation = ConversationChain(llm=llm, memory=memory, callbacks=[StdOutCallbackHandler()])

# Generator function for streaming chatbot responses
def chat_with_bot_stream(user_message, history):
    # Add user's message to the memory
    memory.chat_memory.add_user_message(user_message)
    
    # Start streaming response
    response = conversation.llm._call(user_message, stop=None)  # Directly call the LLM's method for streaming
    bot_reply = ""
    for chunk in response.split("\n"):
        bot_reply += chunk
        # Append to chat history and yield the intermediate output
        history.append((user_message, bot_reply))
        yield history, history

def chat(question, history):
    result = conversation.invoke({"input": question})
    history.append((question, result["response"]))
    return history, history

  memory = ConversationBufferMemory()
  conversation = ConversationChain(llm=llm, memory=memory, callbacks=[StdOutCallbackHandler()])


## Gradio

In [4]:
# Gradio Interface
with gr.Blocks() as gr_interface:
    chatbot = gr.Chatbot(label="Chat with LangChain + OpenAI (Streaming)")
    msg = gr.Textbox(placeholder="Type your message here...")
    clear_btn = gr.Button("Clear")
    
    # Initialize chat history
    state = gr.State([])

    # Define interaction
    msg.submit(chat, [msg, state], [chatbot, state])
    clear_btn.click(lambda: ([], []), None, [chatbot, state])

# Run the Gradio app
gr_interface.launch()



* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.






[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Test
AI:[0m

[1m> Finished chain.[0m
