In [1]:
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)
from constants import MODEL_PATH
# model_path = './models/llama-2-7b-chat.Q4_K_M.gguf'
model_url = 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf'

# 1. Set up local LLM

In [2]:
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    # model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=MODEL_PATH,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,    
    completion_to_prompt=completion_to_prompt,        
    verbose=True,
)

# load documents
documents = SimpleDirectoryReader(
    input_files=["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
).load_data()

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


# 2. Window-sentence retrieval setup (Advanced retrieval)

In [3]:
import os
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage


def build_sentence_window_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents, service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

In [4]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

index = build_sentence_window_index(
    [document],
    llm=llm,
    save_dir="./sentence_index",
)

In [5]:
query_engine = get_sentence_window_query_engine(index, similarity_top_k=6)

In [6]:
response = query_engine.query("How do I build a portfolio of AI projects?")
print(response.response)

  Great! Based on the provided context, here's how you can build a portfolio of AI projects:
1. Start small: As mentioned in the context, don't worry about starting too small. Begin with simple projects that demonstrate your understanding of AI concepts and techniques. This will help you gain confidence and build momentum for more complex projects.
2. Focus on practical applications: While it's important to have a solid theoretical foundation in AI, it's equally crucial to show how your skills can be applied in real-world scenarios. Choose projects that demonstrate practical applications of AI, such as image classification, natural language processing, or predictive modeling.
3. Show progression: As you build your portfolio, aim to show a clear progression in complexity and scope of projects. This will help potential employers or mentors understand your growth and capabilities over time.
4. Communicate your thinking: As mentioned in the context, communication is key when building a por

In [7]:
response = query_engine.query("what was the FY2022 return on equity?")
print(response.response)

Llama.generate: prefix-match hit


  Based on the provided context information, the FY2022 return on equity (ROE) for Macquarie Group Limited is 18.7%. This can be inferred from the statement in the Remuneration Committee letter that "FY2022 ROE of 18.7% is up compared to FY2021's 14.3%"


In [8]:
response = query_engine.query("How to make Pineapple Chicken?")
print(response.response)

Llama.generate: prefix-match hit


  Sure, I'd be happy to help you with that! Based on the context information provided, here is a step-by-step guide on how to make Pineapple Chicken:
Step 1: Heat oil in a large skillet over medium-high heat. Add all ingredients EXCEPT pineapple and chicken. Cook and stir until heated through, about 5-6 minutes.
Step 2: Add pineapple and chicken to the skillet. Cook for another 2 minutes.
That's it! You now have a delicious Pineapple Chicken dish that you can serve over instant brown rice or whole wheat pasta.
I hope this helps, and please let me know if you have any questions or need further clarification.


# 3. Auto-merging retrieval setup

In [9]:
import os

from llama_index import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
)
from llama_index.node_parser import HierarchicalNodeParser
from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext, load_index_from_storage
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank], service_context=automerging_index.service_context
    )
    
    return auto_merging_engine

In [10]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

index = build_automerging_index(
    [document],
    llm=llm,
    save_dir="./merging_index",
)

In [11]:
query_engine = get_automerging_query_engine(index, similarity_top_k=6)

In [12]:
response = query_engine.query("How do I build a portfolio of AI projects?")
print(response.response)

Llama.generate: prefix-match hit


  Building a portfolio of AI projects is an essential step in your career growth as it showcases your skills and progress over time. Here are some steps to help you build a strong portfolio:
1. Start small: Don't worry about starting too small. Begin with simple projects that demonstrate your understanding of AI concepts. This will help you gain experience and build your confidence before moving on to more complex projects.
2. Identify areas of interest: Determine what areas of AI interest you the most, such as machine learning, natural language processing, or computer vision. Focus on projects that align with these interests, as it will make the process more enjoyable and fulfilling.
3. Scope your projects: Before starting a project, define its scope and objectives. Determine what problem you are trying to solve, what data you need to collect, and how you will evaluate success. This will help you stay focused and ensure that your project is feasible and achievable.
4. Build a simple f

In [13]:
response = query_engine.query("what was the FY2022 return on equity?")
print(response.response)

Llama.generate: prefix-match hit


  Based on the provided context information, the FY2022 return on equity for Macquarie Group is 18.7%.


In [14]:
response = query_engine.query("How to make Pineapple Chicken?")
print(response.response)

> Merging 2 nodes into parent node.
> Parent node id: 381e7c21-cda5-4566-a62e-f287b2ac72b9.
...tir	en.		s	text: Add	



Llama.generate: prefix-match hit


  To make Pineapple Chicken, you will need the following ingredients:
* 2 tsp canola or sesame oil
* 1 (10-oz) package frozen broccoli (or stir-fry vegetable mix)
* 1/4 tsp garlic powder
* 1/4 tsp crushed red pepper (optional – use if you like it hot!)
* 1 cup coconut milk
* Salt and pepper to taste
Instructions:
1. Add all ingredients except pineapple and chicken to a pot or wok and stir-fry for 5-6 minutes, or until heated through.
2. Add pineapple and chicken to the pot and cook for another 2 minutes.
3. Stir in coconut milk and simmer for about 10 minutes or until the chicken is cooked through.
4. Scatter over the chilli and serve with rice or whole wheat pasta.
Note: If using frozen pineapple, thaw it in the microwave or by holding the package under cold running water for several minutes before adding it
