In [None]:
import logging
import sys
import torch
import os

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import Settings
from llama_index.core.tools import QueryEngineTool, ToolMetadata, FunctionTool
from llama_index.core.query_engine import RouterQueryEngine

#https://docs.llamaindex.ai/en/stable/getting_started/starter_example_local/

# setup promptTemplate for wrapping the prompt
from llama_index.core import PromptTemplate

from llama_index.vector_stores.redis import RedisVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


In [None]:
# Logging setup
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# Prevents CUDA OOM errors
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
# load documents
documents_anayurt = SimpleDirectoryReader("./data/d1/").load_data()
documents_wiki = SimpleDirectoryReader("./data/d2/").load_data()

In [None]:


# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate(
    "Below is an instruction that describes a task. "
    "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. .\n\n"
    "### Instruction:\n{query_str}\n\n### Response:"
)

In [None]:


llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={ "do_sample": True, "temperature":0.7, "top_p":0.95, "top_k":40,"repetition_penalty":1.1},
    #query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="TheBloke/Llama-2-7B-Chat-GPTQ",
    model_name="TheBloke/Llama-2-7B-Chat-GPTQ",
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    # uncomment this if using CUDA to reduce memory usage (Untested)
    #model_kwargs={"torch_dtype": torch.float16}
)


# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")


Settings.chunk_size = 512
Settings.llm = llm
Settings.embed_model = embed_model

In [None]:

vector_store_d1 = RedisVectorStore(
    index_name="d1",
    index_prefix="d1",
    redis_url="redis://localhost:6379",
    overwrite=True,
)

vector_store_d2 = RedisVectorStore(
    index_name="d2",
    index_prefix="d2",
    redis_url="redis://localhost:6379",
    overwrite=True,
)

d1_storage_context = StorageContext.from_defaults(vector_store=vector_store_d1)

d2_storage_context = StorageContext.from_defaults(vector_store=vector_store_d2)



In [None]:
index_d1 = VectorStoreIndex.from_documents(
    documents_anayurt, storage_context=d1_storage_context, embed_model=embed_model
)

index_d2 = VectorStoreIndex.from_documents(
    documents_wiki, storage_context=d2_storage_context, embed_model=embed_model
)

query_engine_d1 = index_d1.as_query_engine()

query_engine_d2 = index_d2.as_query_engine()

In [None]:

embeddings = embed_model.get_text_embedding("Hello World!")
print(len(embeddings))
print(embeddings)

In [None]:
response_stream_anayurt = query_engine_d1.query("What can you tell me about the Jeff?")

# can be slower to start streaming since llama-index often involves many LLM calls
print(response_stream_anayurt)

In [None]:
response_stream_wiki = query_engine_d2.query("What can you tell me about the Jeff?")

# can be slower to start streaming since llama-index often involves many LLM calls
print(response_stream_wiki)

In [None]:
# Python Function to send a message to Discord using the Discord webhook URL: https://discordapp.com/api/webhooks/.....

import requests
import json

def send_discord_message(message):
    url = "https://discordapp.com/api/webhooks/...
    data = {}
    data["content"] = message
    result = requests.post(url, data=json.dumps(data), headers={"Content-Type": "application/json"})
    try:
        result.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print(err)
    else:
        print("Payload delivered successfully, code {}.".format(result.status_code))


send_discord_message("Hello World!")


# Explain the above function in JSON format

explain_send_discord_message = {
    "type": "object",
    "properties": {
        "message": {
        "type": "string",
        "description": "The message to send to Discord",
        },
    },
    "required": ["message"],
    }


discordFunctionTool = FunctionTool(send_discord_message, {
  "name": "Send_Discord_Message",
  "description": "Use this function to send messages to Discord using the Discord webhook URL.",
  "parameters": explain_send_discord_message,
});

In [None]:
tools = [
    QueryEngineTool(
        query_engine=query_engine_d1,
        metadata=ToolMetadata(
            name="d1_query_engine_tool",
            description=(
                "Use this tool to answer questions about the Billionaire Jeff Bezos."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=query_engine_d2,
        metadata=ToolMetadata(
            name="d2_query_engine_tool",
            description=(
                "Use this tool to answer questions about the Billionaire Jeff Epstein."
            ),
        ),
    ),
]

In [None]:

from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector

query_engine = RouterQueryEngine.from_defaults(query_engine_tools=tools, selector=LLMSingleSelector.from_defaults())

response = query_engine.query("When was Jeff Bezos born?")

print(str(response))

