In [None]:
from pathlib import Path

model_id = "microsoft/Phi-3-mini-4k-instruct"
model_path = "Phi-3-mini-4k-instruct-ov"

if not Path(model_path).exists():
    !optimum-cli export openvino --model {model_id} --task text-generation-with-past --trust-remote-code --weight-format int4 {model_path}

In [1]:
from llama_index.llms.openvino import OpenVINOLLM
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings


ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}

# def messages_to_prompt(messages):
#   prompt_str = "\n".join([str(x) for x in messages])
#   return f"<s>[INST] {prompt_str} [/INST] </s>\n"
  
# def completion_to_prompt(completion):
#   return f"<s>[INST] {completion} [/INST] </s>\n"

model_path = "llama-3-8b-instruct/INT4_compressed_weights"

llm = OpenVINOLLM(
    model_name=str(model_path),
    tokenizer_name=str(model_path),
    context_window=3900,
    max_new_tokens=1000,
    model_kwargs={"ov_config": ov_config, "trust_remote_code": True},
    device_map="cpu",
)


The argument `trust_remote_code` is to be used along with export=True. It will be ignored.
Compiling the model to CPU ...
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import FunctionTool

def multiply(a: float, b: float) -> float:
    """Multiply two numbers and returns the product"""
    return a * b


multiply_tool = FunctionTool.from_defaults(fn=multiply)


def add(a: float, b: float) -> float:
    """Add two numbers and returns the sum"""
    return a + b


add_tool = FunctionTool.from_defaults(fn=add)

In [4]:
from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding


embedding = OpenVINOEmbedding(folder_name='bge-small-en-v1.5', device='cpu')

Compiling the model to CPU ...


In [6]:

Settings.embed_model = embedding
Settings.llm = llm
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

In [8]:
from llama_index.core.tools import QueryEngineTool

budget_tool = QueryEngineTool.from_defaults(
    query_engine,
    name="canadian_budget_2023",
    description="A RAG engine with some basic facts about the 2023 Canadian federal budget.",
)

In [9]:
agent = ReActAgent.from_tools([multiply_tool, add_tool, budget_tool], llm=llm, verbose=True)

In [10]:
response = agent.chat(
    "What is the total amount of the 2023 Canadian federal budget multiplied by 3? Go step by step, using a tool to do any math."
)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: canadian_budget_2023
Action Input: {'input': '2023 Canadian federal budget'}
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;34mObservation:  The 2023 Canadian federal budget was presented to the House of Commons by Finance Minister Chrystia Freeland on March 28, 2023. The budget aimed to make life more affordable for Canadians while reducing government expenditures. It included $43B in net new spending over six years, including $20B for a new 15 per cent refundable tax credit to promote investment in green technologies. The budget also introduced a "grocery rebate" of up to $467 for eligible families and up to $234 for eligible single people with no kids. The Canada Student Grants also saw a 40% increase in funding. The budget introduced a 2% tax on stock buybacks and $15B of spending cuts achieved by defunding public services and cancelling previously announced programs. The budget received Royal assent on June 22, 2023, and became law. 7
‹ 2022 2024›2023 budget of the
Canadian federal
government
Submitted 28 March 2023
Presented 28 March 2023
Parliament 44th
Party Liberal
Finance
ministerChrystia Fr

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The user wants to know the total amount of the 2023 Canadian federal budget multiplied by 3. I will use the tool to extract the total revenue and then multiply it by 3.
Action: multiply
Action Input: {'a': 456.8, 'b': 3}
[0m[1;3;34mObservation: 1370.4
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The user wants to know the total amount of the 2023 Canadian federal budget multiplied by 3. The result is 1370.4. I can now use another tool to add this result to the total amount of the budget.
Action: add
Action Input: {'a': 1370.4, 'b': 40.1}
[0m[1;3;34mObservation: 1410.5
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The user wants to know the total amount of the 2023 Canadian federal budget multiplied by 3. The result is 1370.4. I will use the tool to add this result to the total deficit of the budget.
Action: add
Action Input: {'a': 1370.4, 'b': 40.1}
[0m[1;3;34mObservation: 1410.5
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: I think there's a mistake! The user wants to add the result of multiplying the total revenue by 3 to the total deficit, not add the result to the deficit. Let me try again.

Thought: The user wants to know the total amount of the 2023 Canadian federal budget multiplied by 3. The result is 1370.4. I will use the tool to add this result to the total deficit of the budget.
Action: add
Action Input: {'a': 456.8, 'b': 40.1}
[0m[1;3;34mObservation: 496.90000000000003
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The user wants to know the total amount of the 2023 Canadian federal budget multiplied by 3. The result is 1370.4. I will use the tool to multiply the total revenue by 3.
Action: multiply
Action Input: {'a': 456.8, 'b': 3}
[0m[1;3;34mObservation: 1370.4
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer.
Answer: The total amount of the 2023 Canadian federal budget multiplied by 3 is $1,370.4.
[0m

In [40]:
print(response)

Thought
