In [1]:
import logging
import sys
import torch
import os

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import Settings
from llama_index.core.tools import QueryEngineTool, ToolMetadata, FunctionTool
from llama_index.core.query_engine import RouterQueryEngine

#https://docs.llamaindex.ai/en/stable/getting_started/starter_example_local/

# setup promptTemplate for wrapping the prompt
from llama_index.core import PromptTemplate

from llama_index.vector_stores.redis import RedisVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from redis import Redis

# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.
import nest_asyncio

nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm



In [2]:
# Logging setup
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


In [3]:
# load documents
documents_anayurt = SimpleDirectoryReader("./data/d1/").load_data()
documents_wiki = SimpleDirectoryReader("./data/d2/").load_data()

DEBUG:llama_index.core.readers.file.base:> [SimpleDirectoryReader] Total files added: 1
> [SimpleDirectoryReader] Total files added: 1
DEBUG:fsspec.local:open file: /mnt/27f0ba34-9a14-408f-ab51-9d32ffca687d/ai/rag_demo/data/d1/epstein.txt
open file: /mnt/27f0ba34-9a14-408f-ab51-9d32ffca687d/ai/rag_demo/data/d1/epstein.txt
DEBUG:llama_index.core.readers.file.base:> [SimpleDirectoryReader] Total files added: 1
> [SimpleDirectoryReader] Total files added: 1
DEBUG:fsspec.local:open file: /mnt/27f0ba34-9a14-408f-ab51-9d32ffca687d/ai/rag_demo/data/d2/bezos.txt
open file: /mnt/27f0ba34-9a14-408f-ab51-9d32ffca687d/ai/rag_demo/data/d2/bezos.txt


In [4]:


# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate(
   '''<s>[INST] {query_str} [/INST]
   '''
)


In [5]:


llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={ "do_sample": True, "temperature":0.1},
    
    tokenizer_name="TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
    model_name="TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    # uncomment this if using CUDA to reduce memory usage (Untested)
    #model_kwargs={"torch_dtype": torch.float16}
)


# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")


Settings.chunk_size = 512
Settings.llm = llm
Settings.embed_model = embed_model

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
Starting new HTTPS connection (1): huggingface.co:443




DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/resolve/main/config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/resolve/main/config.json HTTP/1.1" 200 0
INFO:datasets:PyTorch version 2.3.0 available.
PyTorch version 2.3.0 available.




INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/resolve/main/generation_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/resolve/main/generation_config.json HTTP/1.1" 200 0




DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.2-GPTQ/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/modules.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/modules.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/config_sentence_transformers.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/config_sentence_transformers.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEA

In [6]:
from redisvl.schema import IndexSchema
# create a Redis client connection
redis_client = Redis.from_url("redis://localhost:6379")


d1_schema = IndexSchema.from_dict(
    {
        # customize basic index specs
        "index": {
            "name": "d1",
            "prefix": "d1",
            "key_separator": ":",
        },
        # customize fields that are indexed
        "fields": [
            # required fields for llamaindex
            {"type": "tag", "name": "id"},
            {"type": "tag", "name": "doc_id"},
            {"type": "text", "name": "text"},
            # custom metadata fields
            {"type": "numeric", "name": "updated_at"},
            {"type": "tag", "name": "file_name"},
            # custom vector field definition for cohere embeddings
            {
                "type": "vector",
                "name": "vector",
                "attrs": {
                    "dims": 384,
                    "algorithm": "hnsw",
                    "distance_metric": "cosine",
                },
            },
        ],
    }
)

d2_schema = IndexSchema.from_dict(
    {
        # customize basic index specs
        "index": {
            "name": "d2",
            "prefix": "d2",
            "key_separator": ":",
        },
        # customize fields that are indexed
        "fields": [
            # required fields for llamaindex
            {"type": "tag", "name": "id"},
            {"type": "tag", "name": "doc_id"},
            {"type": "text", "name": "text"},
            # custom metadata fields
            {"type": "numeric", "name": "updated_at"},
            {"type": "tag", "name": "file_name"},
            # custom vector field definition for cohere embeddings
            {
                "type": "vector",
                "name": "vector",
                "attrs": {
                    "dims": 384,
                    "algorithm": "hnsw",
                    "distance_metric": "cosine",
                },
            },
        ],
    }
)




vector_store_d1 = RedisVectorStore(
    schema=d1_schema,
    redis_client=redis_client,
    overwrite=True,
)

vector_store_d2 = RedisVectorStore(
    schema=d2_schema,
    redis_client=redis_client,
    overwrite=True,
)

d1_storage_context = StorageContext.from_defaults(vector_store=vector_store_d1)

d2_storage_context = StorageContext.from_defaults(vector_store=vector_store_d2)



16:54:12 redisvl.index.index INFO   Index already exists, overwriting.
INFO:redisvl.index.index:Index already exists, overwriting.
Index already exists, overwriting.
16:54:12 redisvl.index.index INFO   Index already exists, overwriting.
INFO:redisvl.index.index:Index already exists, overwriting.
Index already exists, overwriting.


In [7]:
index_d1 = VectorStoreIndex.from_documents(
    documents_anayurt, storage_context=d1_storage_context, embed_model=embed_model
)

index_d2 = VectorStoreIndex.from_documents(
    documents_wiki, storage_context=d2_storage_context, embed_model=embed_model
)

query_engine_d1 = index_d1.as_query_engine()

query_engine_d2 = index_d2.as_query_engine()

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Main menu

WikipediaThe Free Encyclopedia
Searc...
> Adding chunk: Main menu

WikipediaThe Free Encyclopedia
Searc...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Born and raised in New York City, Epstein began...
> Adding chunk: Born and raised in New York City, Epstein began...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: [10] The medical examiner ruled that his death ...
> Adding chunk: [10] The medical examiner ruled that his death ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: [22] Seymour worked for the New York City Depar...
> Adding chunk: [22] Seymour worked for the New York City Depar...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: [25] From September 1971, he attended the Coura...
> Adding chunk: [25] From September 1971, he attended the Coura...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Greenberg's daughter, Lynne Koeppel, p

Batches: 100%|██████████| 1/1 [00:00<00:00,  1.41it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.57it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.55it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.56it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.55it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.38it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.54it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.25it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.36it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.61it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.61it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.64it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.28it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.60it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.53it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.16it/s]


INFO:llama_index.vector_stores.redis.base:Added 159 documents to index d1
Added 159 documents to index d1
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Main menu

WikipediaThe Free Encyclopedia
Searc...
> Adding chunk: Main menu

WikipediaThe Free Encyclopedia
Searc...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 1993; div. 2019)​
Partner(s)	Lauren Sánchez
(20...
> Adding chunk: 1993; div. 2019)​
Partner(s)	Lauren Sánchez
(20...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: The company began as an online bookstore and ha...
> Adding chunk: The company began as an online bookstore and ha...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: On July 5, 2021, Bezos stepped down as the CEO ...
> Adding chunk: On July 5, 2021, Bezos stepped down as the CEO ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: [15] After his parents divorced, his mother mar...
> Adding chunk: [15] After his parents divorced, his mother m

Batches: 100%|██████████| 1/1 [00:00<00:00,  5.47it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.53it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.55it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.56it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.52it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.57it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.75it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.31it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.49it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.36it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 18.99it/s]

INFO:llama_index.vector_stores.redis.base:Added 113 documents to index d2
Added 113 documents to index d2





In [8]:

embeddings = embed_model.get_text_embedding("Hello World!")
print(len(embeddings))
print(embeddings)

Batches: 100%|██████████| 1/1 [00:00<00:00, 80.76it/s]

384
[-0.003275686874985695, -0.011690798215568066, 0.04155920445919037, -0.03814814239740372, 0.024183068424463272, 0.01364425290375948, 0.011117844842374325, 0.048119720071554184, 0.021409541368484497, 0.014174935407936573, -0.003071072045713663, -0.038435474038124084, 0.009439045563340187, 0.049217499792575836, 0.03807622194290161, 0.022404136136174202, 0.03524314612150192, -0.01304895430803299, -0.10893470048904419, -0.02066672407090664, 0.10619362443685532, 0.047313593327999115, -0.020443320274353027, -0.04986641928553581, 0.0010536344489082694, -0.015180863440036774, -0.014587006531655788, 0.03547670692205429, -0.0015566210495308042, -0.1759529560804367, -0.013892745599150658, -0.011971251107752323, 0.09682812541723251, -0.010525008663535118, 0.037675950676202774, -0.0182676799595356, -0.002510236809030175, 0.04775373265147209, -0.044650234282016754, 0.008240822702646255, 0.041858311742544174, -0.012666523456573486, -0.0014880654634907842, -0.009668886661529541, 0.0139870811253786




In [9]:
response_stream_anayurt = query_engine_d1.query("What can you tell me about the Jeff?")

# can be slower to start streaming since llama-index often involves many LLM calls
print(response_stream_anayurt)

Batches: 100%|██████████| 1/1 [00:00<00:00, 108.48it/s]

INFO:llama_index.vector_stores.redis.base:Querying index d1 with filters *
Querying index d1 with filters *
INFO:llama_index.vector_stores.redis.base:Found 2 results for query with id ['d1:677f5c6a-4cb2-4918-8ec1-dce6bbe3cb45', 'd1:7a8f6d15-e4ac-4046-b567-851a704d58fd']
Found 2 results for query with id ['d1:677f5c6a-4cb2-4918-8ec1-dce6bbe3cb45', 'd1:7a8f6d15-e4ac-4046-b567-851a704d58fd']
DEBUG:llama_index.core.indices.utils:> Top 2 nodes:
> [Node d1:677f5c6a-4cb2-4918-8ec1-dce6bbe3cb45] [Similarity score:             0.524511] "Jeffrey Epstein Dodged Questions About Sex With His Dalton Prep-School Students". The Daily Beas...
> [Node d1:7a8f6d15-e4ac-4046-b567-851a704d58fd] [Similarity score:             0.523232] Little, Brown. ISBN 9780316362450.
 Metcalf, Tom; Melby, Caleb; Alexander, Sophie (July 8, 2019)....
> Top 2 nodes:
> [Node d1:677f5c6a-4cb2-4918-8ec1-dce6bbe3cb45] [Similarity score:             0.524511] "Jeffrey Epstein Dodged Questions About Sex With His Dalton Prep-Scho


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Thomas Tom living,Tom: a	D:'PMG	:B:B:$'020log200xSXE2$3?L4,D0xgf24$30$^230D13E0020*31000'240l2200000B21w500000000212W62000220002020000000212210000062220320001111,320000000000111110000011120000000000111120000000000000000001111200111002210202721212}111112000381100115201002501


In [10]:
response_stream_wiki = query_engine_d2.query("What can you tell me about the Jeff?")

# can be slower to start streaming since llama-index often involves many LLM calls
print(response_stream_wiki)

Batches: 100%|██████████| 1/1 [00:00<00:00, 99.66it/s]

INFO:llama_index.vector_stores.redis.base:Querying index d2 with filters *
Querying index d2 with filters *
INFO:llama_index.vector_stores.redis.base:Found 2 results for query with id ['d2:af634658-f3c6-4d43-9e3c-fe46765d290c', 'd2:36b29fc0-f666-4b0d-a1e0-30f07db9918b']
Found 2 results for query with id ['d2:af634658-f3c6-4d43-9e3c-fe46765d290c', 'd2:36b29fc0-f666-4b0d-a1e0-30f07db9918b']
DEBUG:llama_index.core.indices.utils:> Top 2 nodes:
> [Node d2:af634658-f3c6-4d43-9e3c-fe46765d290c] [Similarity score:             0.558326] [15] After his parents divorced, his mother married Cuban immigrant Miguel "Mike" Bezos in April ...
> [Node d2:36b29fc0-f666-4b0d-a1e0-30f07db9918b] [Similarity score:             0.533403] Retrieved January 9, 2018. I have firsthand knowledge of many of the events. I worked for Jeff (B...
> Top 2 nodes:
> [Node d2:af634658-f3c6-4d43-9e3c-fe46765d290c] [Similarity score:             0.558326] [15] After his parents divorced, his mother married Cuban immigrant M


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.





 Marc Ferdcbp2nFip102,
D22kfl00pk042k2pow1K kap3?21k0566x230JX25656613X23D111356966-2301
10440209630994024041102209056504009200990?03020990420990900910999090999510912090909099090090900200999999300112003020000000103110000500020 .51052003003411000000000060120002.20111 |1


In [11]:
tools = [
    QueryEngineTool(
        query_engine=query_engine_d1,
        metadata=ToolMetadata(
            name="d1_query_engine_tool",
            description=(
                "Use this tool to answer questions about the Billionaire Jeff Bezos."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=query_engine_d2,
        metadata=ToolMetadata(
            name="d2_query_engine_tool",
            description=(
                "Use this tool to answer questions about the Billionaire Jeff Epstein."
            ),
        ),
    ),
]

In [12]:

from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector

router_query_engine = RouterQueryEngine.from_defaults(query_engine_tools=tools, selector=LLMSingleSelector.from_defaults())
# Query to be processed
query = "Tell me more about Bezos?"

# Execute the query and handle the response
try:
    response = router_query_engine.query(query)
    print("Raw response:", response)

    # Attempt to convert the response metadata to JSON
    try:
        selector_result = response.metadata["selector_result"]
        print("Selector Result:", selector_result)
    except ValueError as json_error:
        print("JSON conversion error:", json_error)
        # Handle the error, e.g., log the raw response for further analysis
        selector_result = {"error": str(json_error), "raw_response": str(response.metadata)}
        print("Handled Selector Result:", selector_result)

except Exception as e:
    print("An error occurred while querying:", e)
    selector_result = {"error": str(e)}

# Output the final response
print("Final Selector Result:", selector_result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


An error occurred while querying: Failed to convert output to JSON: "\nIano\n\n\nC242\n\n'd OBNW1daK.1 SnokknowABx3 is<i_01-31.97 1-3 -1.6501 -1 reportedPK-D1.?1aS waso199911013001O1281148001z411w06499201T801w015111};%00331118011}00811}0000E150A011};1800000Net8011}1016=1}0R80M(IS0 (85(0(328((SDA2001};0,1< 2(33_01};00SD30 */»* 42 =3521};eston000(<2 *…0!(1}\\1 >1};\n302 break-2219003 312"
Final Selector Result: {'error': 'Failed to convert output to JSON: "\\nIano\\n\\n\\nC242\\n\\n\'d OBNW1daK.1 SnokknowABx3 is<i_01-31.97 1-3 -1.6501 -1 reportedPK-D1.?1aS waso199911013001O1281148001z411w06499201T801w015111};%00331118011}00811}0000E150A011};1800000Net8011}1016=1}0R80M(IS0 (85(0(328((SDA2001};0,1< 2(33_01};00SD30 */»* 42 =3521};eston000(<2 *…0!(1}\\\\1 >1};\\n302 break-2219003 312"'}


In [13]:
# Python Function to send a message to Discord using the Discord webhook URL: https://discordapp.com/api/webhooks/.....

import requests
import json

def send_discord_message(message):
    url = "https://discordapp.com/api/webhooks/...
    data = {}
    data["content"] = message
    result = requests.post(url, data=json.dumps(data), headers={"Content-Type": "application/json"})
    try:
        result.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print(err)
    else:
        print("Payload delivered successfully, code {}.".format(result.status_code))


send_discord_message("Hello World!")


# Explain the above function in JSON format

explain_send_discord_message = {
    "type": "object",
    "properties": {
        "message": {
        "type": "string",
        "description": "The message to send to Discord",
        },
    },
    "required": ["message"],
    }


discordFunctionTool = FunctionTool(send_discord_message, {
  "name": "Send_Discord_Message",
  "description": "Use this function to send messages to Discord using the Discord webhook URL.",
  "parameters": explain_send_discord_message,
});

SyntaxError: unterminated string literal (detected at line 7) (3018108637.py, line 7)