In [1]:
import os
from google.colab import userdata

from langchain_community.llms import LlamaCpp
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.agent_toolkits.load_tools import load_tools

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.tools import Tool

from langchain_classic import hub
from langchain_classic.agents import AgentExecutor, create_react_agent

from langchain_groq import ChatGroq

from huggingface_hub import hf_hub_download

Dowloading the Llama 3.1 8B Instruct model in GGUF format directly from Hugging Face Hub.

In [2]:
model_path = hf_hub_download(
    repo_id="MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF",
    filename="Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf",
    local_dir="."
)

Loading the model.

In [3]:
llm = LlamaCpp(
    model_path="/content/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=1024,
    temperature=0,
    seed=42,
    verbose=False
)

With `langchain`, it is possible to create piplines involving LLMs. With that, three templates were created as a sequential chain: one for the title, one for character description, and one for the final story, given an initial prompt.

In [4]:
template_title = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Create a short, creative title for a story about {summary}. Only return the title.<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

title_prompt = ChatPromptTemplate.from_template(template_title)

template_character = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Describe the main character of a story about {summary} with the title {title}. Use only two sentences.<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

character_prompt = ChatPromptTemplate.from_template(template_character)

template_story = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Create a story about {summary} with the title {title}. The main character is: {character}. Only return the story and it cannot be longer than one paragraph.<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

story_prompt = ChatPromptTemplate.from_template(template_story)

Running the chain.

In [5]:
title_step = title_prompt | llm | StrOutputParser()

character_step = (
    {"summary": lambda x: x["summary"], "title": lambda x: x["title"]}
    | character_prompt
    | llm
    | StrOutputParser()
)

story_step = story_prompt | llm | StrOutputParser()

chain = (
    RunnableParallel(summary=RunnablePassthrough())
    | RunnablePassthrough.assign(title=title_step)
    | RunnablePassthrough.assign(character=character_step)
    | RunnablePassthrough.assign(story=story_step)
)

out = chain.invoke("A boy who fought Kung-Fu")

for key in list(out.keys()):
    print(key)
    print(out[key])

summary
A boy who fought Kung-Fu
title
"The Dragon's Fury: A Kung-Fu Legend"
character
The main character, a young boy named Ling, is a skilled and fierce Kung-Fu master who wields the power of the dragon. With his unwavering determination and unrelenting fury, Ling is destined to become a legendary hero in the world of Kung-Fu.
story
The Dragon's Fury: A Kung-Fu Legend. Ling, a young boy with an unyielding determination and unrelenting fury, is a skilled and fierce Kung-Fu master who wields the power of the dragon. With his unwavering dedication to justice and his unrelenting passion for Kung-Fu, Ling is destined to become a legendary hero in the world of Kung-Fu.


It is also possible to create agents. For that, the Llama 3.3 70B model is going to be used.

In [6]:
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

llm_agent = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

Equiping the agent with DuckDuckGo Search and a math tool to handle external calculations.

In [7]:
search = DuckDuckGoSearchResults()

search_tool = Tool(
    name="duckduckgo",
    description="Use this for current events or technical facts you don't know.",
    func=search.run,
)

tools = load_tools(["llm-math"], llm=llm_agent)
tools.append(search_tool)

Loading a standard ReAct (Reason + Act) prompt.

In [8]:
agent_prompt = hub.pull("hwchase17/react")

Instantiating the agent model.

In [9]:
agent = create_react_agent(llm_agent, tools, agent_prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
)

Asking a simple question for the model.

In [10]:
res = agent_executor.invoke({"input": "What is the current price of a 14-inch MacBook Pro in USD? Return me the price and a single updated website with this information."})
print(res['output'])

Final Answer: The current price of a 14-inch MacBook Pro in USD is $1,399, and you can find the latest pricing information on websites such as https://appleinsider.com/articles/26/01/05/apples-m5-macbook-pro-drops-to-1399-200-off-as-ces-2026-kicks-off.

As a baseline comparison, the LLM is used directly without tools to see how it affects the answer.

In [11]:
print(llm_agent.invoke("What is the current price of a 14-inch MacBook Pro in USD? Return me the price and a single updated website with this information."))

content="As of my knowledge cutoff in 2023, the prices for a 14-inch MacBook Pro may vary..."