In [1]:
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

model_path = './models/llama-2-7b-chat.Q4_K_M.gguf'
# model_path = './models/mistral-7b-instruct-v0.2.Q4_K_M.gguf'

# 1. Set up local LLM

In [2]:
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    # model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    # model_path='./models/llama-2-13b-chat.Q5_0.gguf',
    model_path=model_path,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,    
    completion_to_prompt=completion_to_prompt,        
    verbose=True,
)

# load documents
documents = SimpleDirectoryReader(
    input_files=["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
).load_data()

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [3]:
response_iter = llm.stream_complete("Can you write me a poem about fast cars?")
for response in response_iter:
    print(response.delta, end="", flush=True)

  Of course! Here is a poem about fast cars:
Racing down the highway, wind in my hair
Fast car glides with grace and poise
Engine purring smooth, accelerator pressed tight
Speeding through the night, feeling alive and bright

The thrill of the ride, the rush of the chase
Adrenaline coursing through my veins
Fast cars take me to places far away
Where the world slows down and I feel alive today

So here's to fast cars, a symbol of speed
A symbol of freedom, a dream to be freed
On the open road, where the wind blows free
I am one with the car, wild and carefree.

# 2. Query engine

In [4]:
from llama_index import set_global_tokenizer
from transformers import AutoTokenizer

set_global_tokenizer(
    AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf").encode
)

# use Huggingface embeddings
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
text_embedding = embed_model.get_text_embedding("hello world")
print(len(text_embedding))

# create a service context
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

# # load documents
# documents = SimpleDirectoryReader(
#     input_files=["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
# ).load_data()

# create vector store index
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

# set up query engine
query_engine = index.as_query_engine(streaming=True)

384


# 3. Test query engine

In [5]:
# helper functions to print out the response
def query(query_str):
    streaming_response = query_engine.query(query_str)
    streaming_response.print_response_stream()

In [6]:
# answer retrieved from eBook-How-to-Build-a-Career-in-AI.pdf
query("how do I get started on a personal project in AI?")
# query("How do I build a portfolio of AI projects?")
# query("Summarize the book in 500 words.")

Llama.generate: prefix-match hit


  Great! Based on the given context, here's an answer to your query:
To get started on a personal project in AI, you can follow these steps:
1. Identify your interests and goals: Think about what areas of AI interest you the most, such as natural language processing, computer vision, or machine learning. Also, consider what you want to achieve through this project, whether it's to develop a new skill, build a portfolio, or explore a specific application area.
2. Research and brainstorm: Once you have a clear idea of your interests and goals, start researching the field by reading articles, watching tutorials, and engaging in online forums. Brainstorm potential project ideas that align with your goals and interests.
3. Start small: Don't feel overwhelmed by trying to build a complex project right from the start. Begin with something simple, like building a chatbot or creating a machine learning model to classify images. This will help you gain confidence and develop your skills.
4. Join

In [7]:
# answer retrieved from annualreport.pdf
query("what was the FY2022 return on equity?")

Llama.generate: prefix-match hit


  Based on the provided context information, the FY2022 return on equity is 8.7%.

In [8]:
# answer retrieved from recipes.pdf
query("How to make Pineapple Chicken?")

Llama.generate: prefix-match hit


  To make Pineapple Chicken, you will need the following ingredients:
* 1 lb boneless, skinless chicken breasts
* 1 cup pineapple juice
* 1/4 cup soy sauce
* 2 tbsp vegetable oil
* 2 tsp garlic powder
* 1 tsp crushed red pepper (optional)
* 1/4 cup chopped frozen broccoli (thawed)
* 1/4 cup pineapple chunks (drained)
* Salt and pepper to taste
Instructions:
1. Heat the oil in a large skillet over medium-high heat. Add all ingredients except for the chicken and cook until heated through, about 5 minutes.
2. Add the chicken and cook for another 2-3 minutes, or until it is cooked through.
3. Serve over rice or brown rice.
Note: If you want to add shrimp, roasted peanuts, beansprouts, or sugar snap peas, you can do so after step 2 and stir-fry them for another

# 4. Window-sentence retrieval setup (Advanced retrieval)

In [9]:
import os
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage


def build_sentence_window_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents, service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

In [10]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

index = build_sentence_window_index(
    [document],
    llm=llm,
    save_dir="./sentence_index",
)

In [11]:
query_engine = get_sentence_window_query_engine(index, similarity_top_k=6)

In [12]:
response = query_engine.query("How do I build a portfolio of AI projects?")
print(response.response)

Llama.generate: prefix-match hit


  Great! Based on the provided context, here's how you can build a portfolio of AI projects:
1. Start small: As mentioned in the context, don't worry about starting too small. Begin with simple projects that demonstrate your understanding of AI concepts and techniques. This will help you gain confidence and build momentum for more complex projects.
2. Focus on practical applications: While it's important to have a solid theoretical foundation in AI, it's equally crucial to show how your skills can be applied in real-world scenarios. Choose projects that demonstrate practical applications of AI, such as image classification, natural language processing, or predictive modeling.
3. Show progression: As you progress in your AI journey, add more complex and challenging projects to your portfolio. This will help potential employers see your skill progression and understand your capabilities.
4. Communicate your thinking: As mentioned in the context, communication is key when building a portf

In [13]:
response = query_engine.query("what was the FY2022 return on equity?")
print(response.response)

Llama.generate: prefix-match hit


  Based on the provided context information, the FY2022 return on equity (ROE) for Macquarie Group Limited is 18.7%. This can be inferred from the statement in the Remuneration Committee letter that "the FY2022 full-year dividend is up 32% compared to FY2021, which reflects a strong performance and position for the future."


In [14]:
response = query_engine.query("How to make Pineapple Chicken?")
print(response.response)

Llama.generate: prefix-match hit


  Based on the provided context information, here is the step-by-step guide to making Pineapple Chicken:
Step 1: Heat oil in a large skillet over medium-high heat. Add all ingredients EXCEPT pineapple and chicken. Cook and stir until heated through, about 5-6 minutes.
Step 2: Add pineapple and chicken to the skillet. Cook for another 2 minutes.
Serve over instant brown rice or whole wheat pasta.
Note: Thaw frozen vegetables in the microwave or by holding the package under cold running water for several minutes.


# 5. Auto-merging retrieval setup

In [15]:
import os

from llama_index import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
)
from llama_index.node_parser import HierarchicalNodeParser
from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext, load_index_from_storage
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank], service_context=automerging_index.service_context
    )
    
    return auto_merging_engine

In [16]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

index = build_automerging_index(
    [document],
    llm=llm,
    save_dir="./merging_index",
)

In [17]:
query_engine = get_automerging_query_engine(index, similarity_top_k=6)

In [18]:
response = query_engine.query("How do I build a portfolio of AI projects?")
print(response.response)

Llama.generate: prefix-match hit


  Building a portfolio of AI projects is an essential step in your career growth as it showcases your skills and progress over time. Here are some steps to help you build a strong portfolio:
1. Start small: Don't worry about starting too small. Begin with simple projects that demonstrate your understanding of AI concepts. This will help you gain experience and build your confidence.
2. Identify your goals: Think about what you want to achieve with your AI career. What are your long-term goals? What skills do you need to develop to get there? Your portfolio should reflect these goals.
3. Choose projects that align with your goals: Select projects that align with your career aspirations and help you develop the skills you need to achieve them.
4. Focus on practical experience: While it's important to understand theoretical concepts, your portfolio should primarily showcase practical experience in AI. This means working on real-world projects that demonstrate your ability to apply AI tech

In [19]:
response = query_engine.query("what was the FY2022 return on equity?")
print(response.response)

Llama.generate: prefix-match hit


  Based on the provided context information, the FY2022 return on equity for Macquarie Group is 18.7%.


In [20]:
response = query_engine.query("How to make Pineapple Chicken?")
print(response.response)

> Merging 2 nodes into parent node.
> Parent node id: 381e7c21-cda5-4566-a62e-f287b2ac72b9.
...tir	en.		s	text: Add	



Llama.generate: prefix-match hit


  To make Pineapple Chicken, you will need the following ingredients:
* 2 tsp canola or sesame oil
* 1 (10-oz) package frozen broccoli (or stir-fry vegetable mix)
* 1/4 tsp garlic powder
* 1/4 tsp crushed red pepper (optional – use if you like it hot!)
* 1 cup coconut milk
* Salt and pepper to taste
Instructions:
1. Add all ingredients except pineapple and chicken to a pot or wok and stir-fry for 5-6 minutes, or until heated through.
2. Add pineapple and chicken to the pot and cook for another 2 minutes.
3. Stir in coconut milk and simmer for about 10 minutes or until the chicken is cooked through.
4. Scatter over the chilli and serve with rice or whole wheat pasta.
Note: If using frozen pineapple, thaw it in the microwave or by holding the package under cold running water for several minutes before adding it
