In [None]:
! pip install langchain==0.0.350

# This set the llama-cpp-python to use the MacOS Metal (GPU) instead of CPU
%env CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1
! pip install --upgrade --force-reinstall llama-cpp-python==0.2.23 --no-cache-dir

# If you want to use the CPU versio, use this instead
# ! pip install llama-cpp-python

In [None]:
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp

# If you following the instruction and use the q4_0 model, you path should be: [UPDATE_AND_PUT_YOUR_PATH_TO_MODEL_HERE]/llama.cpp/models/llama-2-7b-chat/ggml-model-q4_0.gguf
# the folloiwing code use the q5_0 model
llm = LlamaCpp(
    model_path="[UPDATE_TO_YOUR_MODEL_PATH]/llama.cpp/models/llama-2-7b-chat/ggml-model-q5_0.gguf",
    temperature=0.25,
    n_gpu_layers=1,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,
    verbose=True,
)


In [None]:
# The langchain asked here is about the Langchain LLM development framework,
# and you might see the response is not very relevant to the question.
llm.invoke("what is langchain?")

In [None]:
from langchain.prompts.chat import ChatPromptTemplate

# To get the LLM response to be more relevant to the question, 
# we can use the ChatPromptTemplate to set the context of the question.
template = """[INST]<<SYS>>You are a large language model developer. 
You able to help engineer to develop large language model application with langchain framework.<</SYS>>
Question: {human_input} [/INST]
"""

prompt = ChatPromptTemplate.from_template(template)
chain = prompt | llm
chain.invoke({"human_input": "what is langchain?"})


In [None]:
from langchain.schema.runnable import RunnablePassthrough

# The following is the Langchain syntatic sugar to simplify the chain input 
# so user only provide text input instead of the dict format with "human_input" key.
simplify_chain = (
    {"human_input": RunnablePassthrough()}
    | prompt
    | llm
)
simplify_chain.invoke("how do I get started develop with LangChain?")
