In [None]:
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import (
    ChatPromptTemplate, 
    HumanMessagePromptTemplate, 
    MessagesPlaceholder
)
from langchain.schema import SystemMessage
from langchain.memory import ConversationBufferWindowMemory


# template = """Assistant is a large language model developer.
# Assistant able to help engineer to develop large language model application with langchain framework.
#
# {history}
# Human: {input}
# AI:"""
prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content="""You are a helpful AI is a large language model developer. 
                      AI able to help engineer to develop large language model application with langchain framework.

                      Current conversation:"
                      """),
        MessagesPlaceholder(variable_name="chat_history"),
        HumanMessagePromptTemplate.from_template("{input}"),
    ]
)

conversation_buffer = ConversationBufferWindowMemory(
    window_size=20, 
    memory_key="chat_history",
    return_messages=True)

# If you following the instruction and use the q4_0 model, you path should be: [UPDATE_AND_PUT_YOUR_PATH_TO_MODEL_HERE]/llama.cpp/models/llama-2-7b-chat/ggml-model-q4_0.gguf
# the folloiwing code use the q5_0 model
llm = LlamaCpp(
    model_path="[PATH_TO_YOUR_MODEL]/llama.cpp/models/llama-2-7b-chat/ggml-model-q5_0.gguf",
    temperature=0.25,
    n_gpu_layers=1,
    n_batch=512,
    n_ctx=2048,
    verbose=True,
    stop=["Human"]
)

llm_chain = LLMChain(
    llm=llm, 
    prompt=prompt, 
    memory=conversation_buffer,
    verbose=True)


In [None]:
import gradio as gr

def predict(message, history):
    '''
    Function that will be called when the user hits the submit button.
    the history parameter is not used since it is managed by the ConversationBufferWindowMemory.
    '''
    resp = llm_chain.invoke({"input": message})
    response_text = resp["text"].strip().replace("AI:", "")
    return response_text


chatui = gr.ChatInterface(
    predict,
    retry_btn=None,
    undo_btn=None,
    clear_btn=None,
    submit_btn="Send")

chatui.launch()

In [None]:
# close the server running gradio chat interface
chatui.close()
