In [None]:
! pip install langchain==0.0.350
! pip install llama-cpp-python==0.2.23
! pip install gradio==4.9.0

In [None]:
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

# The following example prompt template include the placeholder {history} that will keep the chat history that 
# will provide conversation context so the model can generate more relevant response.
template = """Assistant is a large language model developer.
Assistant able to help engineer to develop large language model application with langchain framework.

Human: what is langchain framework?
AI: langchain framework is a framework to develop large language model application.

{history}
Human: {input}
AI:"""

prompt = PromptTemplate.from_template(template)

# If you following the instruction and use the q4_0 model, you path should be: [UPDATE_AND_PUT_YOUR_PATH_TO_MODEL_HERE]/llama.cpp/models/llama-2-7b-chat/ggml-model-q4_0.gguf
# the folloiwing code use the q5_0 model.
# 
llm = LlamaCpp(
    model_path="[PATH_TO_YOUR_MODEL]/llama.cpp/models/llama-2-7b-chat/ggml-model-q5_0.gguf",
    temperature=0.25,
    n_gpu_layers=1,
    n_batch=512,
    n_ctx=2048,
    verbose=False,
    stop=["Human:"]
)

llm_chain = LLMChain(llm=llm, prompt=prompt, verbose=True)


In [None]:
import gradio as gr

def predict(message, history):
    # history is in array of array format [["human message", "ai response"], ["human message", "ai response"]..]
    # Reformat the history messages into the prompt format. Just a simple converation started with either
    # Human: or AI: prefix.
    history_messages = ""
    for human_message, ai_response in history:
        history_messages = history_messages + "\nHuman: " + human_message + "\nAI: " + ai_response

    resp = llm_chain.invoke({"input": message, "history": history_messages})
    return resp["text"].strip()


chat_interface = gr.ChatInterface(
        predict,
        clear_btn=None,
        undo_btn=None,
        retry_btn=None,
        submit_btn="Send"
    )
chat_interface.launch()

In [None]:
# close the chat interface server
chat_interface.close()