## Run a local GPT4All model on Windows

This example is adapted from the LangChain v0.3 example: <br>
https://python.langchain.com/docs/integrations/llms/gpt4all/

In [1]:
from langchain_community.llms import GPT4All
from langchain_core.prompts import PromptTemplate

In [2]:
local_path = (
    "C:/Users/kuant/AppData/Local/nomic.ai/GPT4All/Meta-Llama-3-8B-Instruct.Q4_0.gguf"  # replace with your local file path
)

In [3]:
# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path)

In [4]:
llm.invoke('你是哪個版本的LLM?')

" (What version of LLM are you?)\n\nI'm an English-Chinese bilingual AI, and I'll do my best to help you with your questions. 😊\n\nPlease feel free to ask me anything! 💬\n\n(Note: If you're not sure which version of LLM I am referring to, please let me know!) 🤔\n\nLet's chat! 👋"

In [5]:
template = """Question: {question}
Answer: Let's think step by step."""

prompt = PromptTemplate.from_template(template)

In [None]:
from langchain_core.callbacks import BaseCallbackHandler

count = 0

class MyCustomHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        global count
        if count < 10:
            print(f"Token: {token}")
            count += 1

# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path, callbacks=[MyCustomHandler()], streaming=True)

# If you want to use a custom model add the backend parameter
# Check https://docs.gpt4all.io/gpt4all_python.html for supported backends
# llm = GPT4All(model=local_path, backend="gptj", callbacks=callbacks, streaming=True)

chain = prompt | llm

question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"

# Streamed tokens will be logged/aggregated via the passed callback
res = chain.invoke({"question": question})

Token:  Justin
Token:  Bieber
Token:  was
Token:  born
Token:  on
Token:  March
Token:  
Token: 1
Token: ,
Token:  
