In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt


In [2]:
model_path = "../model/llama-2-7b-chat.Q4_0.gguf" # the latest Llama CPP version only support GGUF instead of GGML
llm = LlamaCPP(
    model_path=model_path,
    temperature=0, # set to 0 to enforce consistency
    max_new_tokens=2048,
    context_window=3900, # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": 1}, # set to 1 if you have a GPU
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from model/llama-2-7b-chat.Q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_0     [  4096,  4096,     1,    

In [3]:
documents = SimpleDirectoryReader("../data").load_data()


In [4]:
# use a different LLM
embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2" # this will download from huggingface and store the model locally - /home/user/.cache/huggingface
)

In [5]:
# create a service context
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    chunk_overlap=30, # think of this as a buffer - how many tokens to overlap between chunks since the llm may loose context due it being chopped
)

In [6]:
# create vector store index
# this will run about 12-15 mins for a pure CPU ingestion and around 3-4 mins with GPU
index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)


Parsing documents into nodes:   0%|          | 0/1393 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1442 [00:00<?, ?it/s]

In [7]:
index.storage_context.persist()

In [9]:
# define the query engine from the vector index
r_query_engine = index.as_query_engine(streaming=True)
response_iter = r_query_engine.query("How do I load TIFF files in QGIS 3.22?")
response_iter.print_response_stream()

  To load TIFF files in QGIS 3.22, you can follow these steps:
1. Open the Data Source Manager dialog by clicking on "OpenDataSourceManager" (or press Ctrl+L).
2. In the layer type tab, select "Add Layer" > "Add Raster Layer" (or use the toolbar button).
3. In the Add Raster Layer dialog, select "Filesource" as the source type and click "Browse".
4. Navigate to the TIFF file you want to load and select it. You can also hold down the Ctrl key and click on multiple items in the dialog to select a range of files, or hold down the Shift key to select a range of files by clicking on the first and last item in the range.
5. Press "Open" to load the selected TIFF file into QGIS. The layer will be added to the Data Source Manager dialog, where you can further configure it as needed.
Note: If you have multiple TIFF files in the same folder, you can select them all by holding down the Ctrl key and clicking on each file in turn.


llama_print_timings:        load time = 78270.57 ms
llama_print_timings:      sample time =   230.83 ms /   253 runs   (    0.91 ms per token,  1096.05 tokens per second)
llama_print_timings: prompt eval time = 105203.22 ms /   687 tokens (  153.13 ms per token,     6.53 tokens per second)
llama_print_timings:        eval time = 63038.04 ms /   252 runs   (  250.15 ms per token,     4.00 tokens per second)
llama_print_timings:       total time = 169561.17 ms


In [10]:
response_iter = r_query_engine.query("Using PyQGIS only, create a script that will load a TIFF file and clip it to the bounds of shapefile. Don't include the explanations.")
response_iter.print_response_stream()

Llama.generate: prefix-match hit


  Sure! Here is a script that you can use with PyQGIS to clip a TIFF file to the bounds of a shapefile:
```
from pyqgis.processing import Processing
# Set the input and output layers
input_layer = "path/to/tiff/file.tif"
output_layer = "path/to/shapefile/with/bounds.shp"

# Define the algorithm and parameters
algorithm_id = "gdal:clipvectorbyextent"
parameter_dictionary = {
    "Input": input_layer,
    "Clipping extent": output_layer,
    "Override projection for the output file": False,
    "Assign a specified nodata value to output bands": None,
    "Optional nodata value": None,
}
# Run the algorithm
processing.run(algorithm_id, parameter_dictionary)
```
This script will load the TIFF file specified in the `input_layer` and clip it to the bounds of the shapefile specified in the `output_layer`. The `parameter_dictionary` defines the input and output layers, as well as any additional options for the algorithm. In this case, we are using the `gdal:clipvectorbyextent` algorithm provid


llama_print_timings:        load time = 78270.57 ms
llama_print_timings:      sample time =   270.98 ms /   287 runs   (    0.94 ms per token,  1059.13 tokens per second)
llama_print_timings: prompt eval time = 127593.21 ms /   844 tokens (  151.18 ms per token,     6.61 tokens per second)
llama_print_timings:        eval time = 79659.32 ms /   286 runs   (  278.53 ms per token,     3.59 tokens per second)
llama_print_timings:       total time = 208812.05 ms
