### Repo

Clone and build Llama.cpp, following instructions here:

https://github.com/ggerganov/llama.cpp

### Model

Download a local LLM, ideally one that is capable of tool calling to use all features discussed below:
 
* Weights: `meta-llama-3-8b-instruct.Q8_0.gguf`
* Link: https://huggingface.co/SanctumAI/Meta-Llama-3-8B-Instruct-GGUF

### Init

In [3]:
from libs.community.langchain_community.chat_models.llamacpp import ChatLlamaCpp

ModuleNotFoundError: No module named 'libs'

In [1]:
import multiprocessing

from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.chat_models import ChatLlamaCpp

llm = ChatLlamaCpp(
    temperature=0.3,
    model_path="/Users/rlm/Desktop/Code/llama.cpp/models/meta-llama-3-8b-instruct.Q8_0.gguf",
    n_ctx=10000,
    n_gpu_layers=4,
    n_batch=200,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    max_tokens=512,
    n_threads=multiprocessing.cpu_count() - 1,
    callback_manager=CallbackManager(
        [StreamingStdOutCallbackHandler()]
    ),  # Callbacks support token-wise streaming
    streaming=True,
    repeat_penalty=1.5,
    top_p=0.5,
    stop=["<|end_of_text|>", "<|eot_id|>"],
    verbose=True,
)

ImportError: cannot import name 'ChatLlamaCpp' from 'langchain_community.chat_models' (/Users/rlm/Desktop/Code/langchain-main/langchain/libs/community/langchain_community/chat_models/__init__.py)

### Invoke

In [None]:
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]

ai_msg = llm.invoke(messages)
ai_msga\

### Chain

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that translates {input_language} to {output_language}.",
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | llm
chain.invoke(
    {
        "input_language": "English",
        "output_language": "German",
        "input": "I love programming.",
    }
)

### Tool calling

* However, it cannot automatically trigger a function/tool. 
* We need to force it by specifying the 'tool choice' parameter. This parameter is typically formatted as described below.

In [None]:
from langchain.tools import tool
from langchain_core.pydantic_v1 import BaseModel, Field

class WeatherInput(BaseModel):
    location: str = Field(description="The city and state, e.g. San Francisco, CA")
    unit: str = Field(enum=["celsius", "fahrenheit"])

@tool("get_current_weather", args_schema=WeatherInput)
def get_weather(location: str, unit: str):
    """Get the current weather in a given location"""
    return f"Now the weather in {location} is 22 {unit}"


llm_with_tools = llm.bind_tools(
    tools=[get_weather],
    tool_choice={"type": "function", "function": {"name": "get_current_weather"}},
)