In [1]:
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

model_path = './models/llama-2-7b-chat.Q4_K_M.gguf'
# model_path = './models/mistral-7b-instruct-v0.2.Q4_K_M.gguf'

# 1. Set up local LLM

In [2]:
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    # model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    # model_path='./models/llama-2-13b-chat.Q5_0.gguf',
    model_path=model_path,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,    
    completion_to_prompt=completion_to_prompt,        
    verbose=True,
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [3]:
response_iter = llm.stream_complete("Can you write me a poem about fast cars?")
for response in response_iter:
    print(response.delta, end="", flush=True)

  Of course! Here is a poem about fast cars:
Racing down the highway, wind in my hair
The engine purring smoothly, without a care
Fast and free, like a bird in flight
The thrill of speed, a feeling so bright

A sleek machine, built for speed and grace
Cutting through the air, with precision and pace
The roar of the engine, a symphony to hear
As I drive fast, my heart full of cheer

The world outside, a blur in my view
But the thrill of the ride, forever true
Fast cars, a dream come true
A feeling that's mine, and yours too.

# 2. Query engine

In [4]:
from llama_index import set_global_tokenizer
from transformers import AutoTokenizer

set_global_tokenizer(
    AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf").encode
)

# use Huggingface embeddings
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
text_embedding = embed_model.get_text_embedding("hello world")
print(len(text_embedding))

# create a service context
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

# load documents
documents = SimpleDirectoryReader(
    input_files=["./docs/eBook-How-to-Build-a-Career-in-AI.pdf", "./docs/recipes.pdf", "./docs/annualreport.pdf"]
).load_data()

# create vector store index
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

# set up query engine
query_engine = index.as_query_engine(streaming=True)

384


# 3. Test query engine

In [5]:
# helper functions to print out the response
def query(query_str):
    streaming_response = query_engine.query(query_str)
    streaming_response.print_response_stream()

In [6]:
# answer retrieved from eBook-How-to-Build-a-Career-in-AI.pdf
query("how do I get started on a personal project in AI?")
# query("How do I build a portfolio of AI projects?")
# query("Summarize the book in 500 words.")

Llama.generate: prefix-match hit


  Great! Based on the given context, here's an answer to your query:
To get started on a personal project in AI, you can follow these steps:
1. Identify your interests and goals: Think about what areas of AI interest you the most, such as natural language processing, computer vision, or machine learning. Also, consider what you want to achieve through this project, whether it's to develop a new skill, build a portfolio, or solve a real-world problem.
2. Research and brainstorm: Once you have a clear idea of your interests and goals, start researching the field and identifying potential projects. Read articles, watch videos, and engage in online forums to learn about the latest developments and trends in AI. Brainstorm ideas based on your research, and write them down.
3. Start small: Don't feel overwhelmed by trying to build a complex project from the start. Begin with something simple that you can complete quickly, such as building a chatbot or creating a machine learning model for im

In [7]:
# answer retrieved from annualreport.pdf
query("what was the FY2022 return on equity?")

Llama.generate: prefix-match hit


  Based on the provided context information, the FY2022 return on equity is 8.7%.

In [8]:
# answer retrieved from recipes.pdf
query("How to make Pineapple Chicken?")

Llama.generate: prefix-match hit


  To make Pineapple Chicken, you will need the following ingredients:
* 1 lb boneless, skinless chicken breasts
* 1 cup pineapple juice
* 1/4 cup soy sauce
* 2 tbsp vegetable oil
* 2 cloves garlic, minced
* 1 tsp grated ginger
* 1/4 cup chopped green onions (optional)
* Salt and pepper to taste
Instructions:
1. Heat the oil in a large skillet or wok over medium-high heat. Add the chicken and cook until browned on all sides, about 5 minutes. Remove the chicken from the skillet and set aside.
2. In the same skillet, add the pineapple juice, soy sauce, garlic, ginger, and green onions (if using). Stir to combine and bring to a simmer.
3. Add the cooked chicken back to the skillet and stir to coat with the sauce. Cook for an additional 2-3 minutes, until the chicken is fully coated and heated through.