In [None]:
!pip install llama-index
!pip install llama-index-llms-huggingface
!pip install llama-index-embeddings-huggingface
!pip install llama-index-embeddings-huggingface-api
!pip install bitsandbytes
!pip install accelerate    #Note Restart kernel after running this cell

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import userdata
hf_token = userdata.get('HF_API_KEY') #Add your hugging face API key

### Setup Tokenizer and Stopping ids

In [3]:
from transformers import AutoTokenizer

#Please take access from Meta for using Llama 3 prior to running this
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    token=hf_token,
)

stopping_ids = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### Setup LLM using HuggingFaceLLM

In [5]:
system_prompt="""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""

In [6]:
import torch
from llama_index.llms.huggingface import HuggingFaceLLM


llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    model_kwargs={
        "token": hf_token,
        "torch_dtype": torch.bfloat16,
        "load_in_8bit":True,
    },
    system_prompt=system_prompt,
    generate_kwargs={
        "do_sample": True,
        "temperature": 0.6,
        "top_p": 0.9,
        "early_stopping": True,
    },
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    tokenizer_kwargs={"token": hf_token},
    stopping_ids=stopping_ids,
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### Load Data

In [7]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["/content/JioMeet_FAQ.pdf"]
).load_data()

### Setup Embedding Model

In [8]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")  #Embedding model

### Set Default LLM and Embedding Model

In [9]:
from llama_index.core import Settings

# bge embedding model
Settings.embed_model = embed_model

# Llama-3-8B-Instruct model
Settings.llm = llm

### Create Index and Query engine

In [10]:
index = VectorStoreIndex.from_documents(
    documents,
)

query_engine = index.as_query_engine(similarity_top_k=3)

In [11]:
response = query_engine.query("What is JioMeet?")
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 JioMeet is a Video Conferencing Service that allows you to conduct a conference call (video or audio) anytime, anywhere. Businesses can connect with up to 100 participants in a conference using the JioMeet app on a desktop/ laptop, smartphone, or from a legacy Video Conferencing (VC) device.


In [12]:
import textwrap
response = textwrap.fill(str(response), width=170)
print(response)

 JioMeet is a Video Conferencing Service that allows you to conduct a conference call (video or audio) anytime, anywhere. Businesses can connect with up to 100
participants in a conference using the JioMeet app on a desktop/ laptop, smartphone, or from a legacy Video Conferencing (VC) device.


### Agents And Tools

In [13]:
import json
from typing import Sequence, List

from llama_index.core.llms import ChatMessage
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.agent import ReActAgent

import nest_asyncio

nest_asyncio.apply()

#### Define Tools

In [14]:
def multiply(a: int, b: int) -> int:
    """Multiple two integers and returns the result integer"""
    return a * b


def add(a: int, b: int) -> int:
    """Add two integers and returns the result integer"""
    return a + b


def subtract(a: int, b: int) -> int:
    """Subtract two integers and returns the result integer"""
    return a - b


def divide(a: int, b: int) -> int:
    """Divides two integers and returns the result integer"""
    return a / b


multiply_tool = FunctionTool.from_defaults(fn=multiply)
add_tool = FunctionTool.from_defaults(fn=add)
subtract_tool = FunctionTool.from_defaults(fn=subtract)
divide_tool = FunctionTool.from_defaults(fn=divide)

#### ReAct Agent

In [15]:
agent = ReActAgent.from_tools(
    [multiply_tool, add_tool, subtract_tool, divide_tool],
    llm=llm,
    verbose=True,
)

In [16]:
response = agent.chat("What is (121 + 2) * 5?")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: add
Action Input: {'a': 121, 'b': 2}
[0m[1;3;34mObservation: 123
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The result of the addition is 123. Now I need to multiply 123 by 5.
Action: multiply
Action Input: {'a': 123, 'b': 5}
[0m[1;3;34mObservation: 615
[0m[1;3;38;5;200mThought: The result of the multiplication is 615. I can now answer the question.
Thought: I can answer without using any more tools. I'll use the user's language to answer
Answer: The result is 615.
[0m

In [17]:
print(str(response))

The result is 615.


### ReAct Agent With RAG QueryEngine Tools

In [18]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)

from llama_index.core.tools import QueryEngineTool, ToolMetadata

#### Load Data

In [19]:
jio_business_docs = SimpleDirectoryReader(
    input_files=["/content/JioBusiness_Solutions_FAQ.pdf"]
).load_data()
jio_meet_docs = SimpleDirectoryReader(
    input_files=["/content/JioMeet_FAQ.pdf"]
).load_data()

#### Create Indices and Query engines

In [20]:
jio_business_index = VectorStoreIndex.from_documents(jio_business_docs)
jio_meet_index = VectorStoreIndex.from_documents(jio_meet_docs)

jio_business_engine = jio_business_index.as_query_engine(similarity_top_k=3)
jio_meet_engine = jio_meet_index.as_query_engine(similarity_top_k=3)

#### Define QueryEngine Tools

In [21]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=jio_business_engine,
        metadata=ToolMetadata(
            name="jio_business",
            description=(
                "Provides information about Jio business Solution. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=jio_meet_engine,
        metadata=ToolMetadata(
            name="jio_meet",
            description=(
                "Provides information about Jio meet. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

#### Create ReAct Agent using RAG QueryEngine Tools

In [22]:
agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
)

#### Querying

In [23]:
response = agent.chat("What is special about Jio Meet?")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: jio_meet
Action Input: {'input': 'What is special about Jio Meet?'}
[0m

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[1;3;34mObservation:  JioMeet is a Video Conferencing Service that allows you to conduct  a conference  call (video or audio)  anytime, anywhere. Busin esses can connect with up to 100 participants in a conference using the JioMeet app on a desktop/ laptop, smartphone, or from a legacy Video Conferencing (VC) device. Additionally, JioMeet Pro allows for multiple Admin/  Host licenses and is ideally suited for business users, with features such as conducting video calls with up to 25 0 participants, having an Admin portal to manage licenses, getting User Management rights, and recording meetings.
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer.
Answer: JioMeet is a Video Conferencing Service that allows you to conduct a conference call (video or audio) anytime, anywhere. It allows businesses to connect with up to 100 participants in a conference using the JioMeet app on a desktop/laptop, smartphone, or from a legacy Video Co

In [24]:
print(str(response))

JioMeet is a Video Conferencing Service that allows you to conduct a conference call (video or audio) anytime, anywhere. It allows businesses to connect with up to 100 participants in a conference using the JioMeet app on a desktop/laptop, smartphone, or from a legacy Video Conferencing (VC) device. Additionally, JioMeet Pro has features such as conducting video calls with up to 250 participants, having an Admin portal to manage licenses, getting User Management rights, and recording meetings.


In [None]:
response = agent.chat("How Jio service Solutions can help my business?")