# Ordinary Langchain Chain

In [15]:
from langchain.prompts import PromptTemplate
from langchain.llms import LlamaCpp
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
model_path = "../model/llama-2-7b-chat.Q4_0.gguf"
# model_path = "../model/dolphin-2.0-mistral-7b.Q3_K_M.gguf"
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    temperature=0,
    top_p=1,
    max_tokens=2000,
    n_gpu_layers=1,
    f16_kv=True,
    verbose=True,
)


llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../model/llama-2-7b-chat.Q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_0     [  4096,  4096,     1, 

In [3]:
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)
chain = LLMChain(llm=llm, prompt=prompt)

print(chain.run("colorful socks"))





1. Rainbow Socks Co.
2. Hueful Things
3. ChromaSock
4. Spectrum Socks
5. ColourCreations
6. VibrantSoles
7. BrightBridges
8. Kaleidosockery
9. ChromaComforts
10. DiverseSocks
11. MultihuedSock Co.
12. SpectrumSole Solutions
13. ColorfulCreations Socks
14. RainbowRhythms
15. ChromaChic Socks
16. VibrantVentures
17. SpectrumStyling
18. ColourfulCreations Co.
19. HueHaven Socks
20. BrightBridges Sock Co.



llama_print_timings:        load time =  4450.26 ms
llama_print_timings:      sample time =   399.35 ms /   186 runs   (    2.15 ms per token,   465.75 tokens per second)
llama_print_timings: prompt eval time =  4746.78 ms /    15 tokens (  316.45 ms per token,     3.16 tokens per second)
llama_print_timings:        eval time = 13198.95 ms /   185 runs   (   71.35 ms per token,    14.02 tokens per second)
llama_print_timings:       total time = 19068.11 ms


# Llama Index

In [1]:
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index import load_index_from_storage
from llama_index.vector_stores import SimpleVectorStore
from llama_index.storage.index_store import SimpleIndexStore
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.storage.storage_context import StorageContext

from llama_index import ServiceContext
model_path = "../model/llama-2-7b-chat.Q4_0.gguf"
# model_path = "../model/dolphin-2.0-mistral-7b.Q3_K_M.gguf"
llm_index = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_path=model_path,
    temperature=0,
    max_new_tokens=2000,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)
embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

service_context = ServiceContext.from_defaults(
    llm=llm_index,
    embed_model=embed_model,
    chunk_overlap=0,
)

storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore.from_persist_dir(persist_dir="../storage"),
    vector_store=SimpleVectorStore.from_persist_dir(persist_dir="../storage"),
    index_store=SimpleIndexStore.from_persist_dir(persist_dir="../storage")
)
index = load_index_from_storage(storage_context, service_context=service_context)
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../model/llama-2-7b-chat.Q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_0     [  4096,  4096,     1, 

## Test the LLM

In [2]:
# function wrapper for ask assistant
def ask_assistant(prompt):
    stream_response = chat_engine.stream_chat(prompt)
    for token in stream_response.response_gen:
        print(token, end="")

In [4]:
prompt = "where can I find how to clip a raster in QGIS 3.22? Provide a step by step answer with numbers."
ask_assistant(prompt)

Llama.generate: prefix-match hit

llama_print_timings:        load time =  5701.85 ms
llama_print_timings:      sample time =   144.53 ms /    59 runs   (    2.45 ms per token,   408.21 tokens per second)
llama_print_timings: prompt eval time =  5173.39 ms /   514 tokens (   10.06 ms per token,    99.35 tokens per second)
llama_print_timings:        eval time =  4346.21 ms /    58 runs   (   74.93 ms per token,    13.34 tokens per second)
llama_print_timings:       total time =  9930.40 ms
Llama.generate: prefix-match hit


Querying with:   Sure, here's the standalone question based on the conversation:
How do I clip a raster layer in QGIS 3.22 using the "Clip Raster by Extent" algorithm? Please provide a step-by-step guide with numbers.
  Sure, here's a step-by-step guide on how to clip a raster layer in QGIS 3.22 using the "Clip Raster by Extent" algorithm:
1. Open QGIS and navigate to the layer you want to clip.
2. Click on the "Raster" menu and select "Extraction" or press the hotkey "E".
3. In the "Clip Raster by Extent" dialog box, enter the following parameters:
a. Input layer: Select the raster layer you want to clip.
b. Clipping extent: Enter the extent of the area you want to clip the raster layer. You can enter the coordinates as xmin, ymin, xmax, and ymax.
c. Override the projection for the output file: Check this box if you want to assign the input layer's CRS to the output file. Otherwise, leave it unchecked.
d. Assign a specified nodata value to output bands (optional): Enter the value you 


llama_print_timings:        load time =  5701.85 ms
llama_print_timings:      sample time =   406.62 ms /   495 runs   (    0.82 ms per token,  1217.34 tokens per second)
llama_print_timings: prompt eval time = 11255.07 ms /  1101 tokens (   10.22 ms per token,    97.82 tokens per second)
llama_print_timings:        eval time = 40282.83 ms /   494 runs   (   81.54 ms per token,    12.26 tokens per second)
llama_print_timings:       total time = 90817.13 ms


# Own AI Agent

In [34]:
from typing import Sequence, List
import json
from llama_index.llms import ChatMessage
from llama_index.tools import BaseTool, FunctionTool

class LlamaIndexAIAgent:
    def __init__(
        self,
        tools: Sequence[BaseTool] = [],
        llm: LlamaCPP = llm_index,
        chat_history: List[ChatMessage] = [],
    ) -> None:
        self._llm = llm
        self._tools = {tool.metadata.name: tool for tool in tools}
        self._chat_history = chat_history

    def reset(self) -> None:
        self._chat_history = []

    def chat(self, message: str) -> str:
        chat_history = self._chat_history
        chat_history.append(ChatMessage(role="user", content=message))
        functions = [
            tool.metadata.to_openai_function() for _, tool in self._tools.items()
        ]
        ai_message = self._llm.chat(chat_history, functions=functions).message
        chat_history.append(ai_message)
        function_call = ai_message.additional_kwargs.get("function_call", None)
        if function_call is not None:
            function_message = self._call_function(function_call)
            chat_history.append(function_message)
            ai_message = self._llm.chat(chat_history).message
            chat_history.append(ai_message)

        return ai_message.content

    def _call_function(self, function_call: dict) -> ChatMessage:
        tool = self._tools[function_call["name"]]
        output = tool(**json.loads(function_call["arguments"]))
        return ChatMessage(
            name=function_call["name"],
            content=str(output),
            role="function",
            additional_kwargs={"name": function_call["name"]},
        )

In [36]:
from llama_index.tools import QueryEngineTool, ToolMetadata
rag_engine = index.as_query_engine(streaming=True)
query_engine_tools = [
    QueryEngineTool(
        query_engine=rag_engine,
        metadata=ToolMetadata(
            name="qgis322",
            description="Provides information on how to use QGIS 3.22. "
            "Use a detailed plain text question as input to the tool.",
        ),
    )
]


In [37]:
agent = LlamaIndexAIAgent(tools=query_engine_tools)


In [38]:
print(agent.chat("How do I add a symbol to a map in QGIS 3.22?"))


Llama.generate: prefix-match hit


  To add a symbol to a map in QGIS 3.22, you can follow these steps:
1. Open the QGIS application and navigate to the project or layer that you want to add the symbol to.
2. Right-click on the layer or feature group that you want to add the symbol to and select "Layer Properties" from the menu.
3. In the Layer Properties dialog box, click on the "Symbology" tab.
4. Under "Symbol", select the type of symbol you want to use for the layer or feature group. You can choose from a variety of pre-defined symbols or create your own custom symbol using the "Symbol Editor".
5. If using a pre-defined symbol, you can adjust its size, color, and other properties by clicking on the "Edit" button next to the symbol name.
6. If creating a custom symbol, use the "Symbol Editor" tool to draw or upload an image for your symbol. You can adjust the size, position, and other properties of the symbol as needed.
7. Once you have made your desired changes, click "OK" to apply them.
8. The new symbol will now b


llama_print_timings:        load time =  3656.87 ms
llama_print_timings:      sample time =   769.03 ms /   299 runs   (    2.57 ms per token,   388.80 tokens per second)
llama_print_timings: prompt eval time =   432.82 ms /    23 tokens (   18.82 ms per token,    53.14 tokens per second)
llama_print_timings:        eval time = 22458.04 ms /   298 runs   (   75.36 ms per token,    13.27 tokens per second)
llama_print_timings:       total time = 25279.69 ms
