In [None]:
import textwrap
wrapper = textwrap.TextWrapper(width=140)

In [1]:
# model_name = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [2]:
# from llama_index.core.llms import ChatMessage, MessageRole
# from llama_index.core import ChatPromptTemplate

# # Text QA Prompt
# chat_text_qa_msgs = [
#     ChatMessage(
#         role=MessageRole.SYSTEM,
#         content=(
#             "Always answer the question, even if the context isn't helpful."
#         ),
#     ),
#     ChatMessage(
#         role=MessageRole.USER,
#         content=(
#             "Context information is below.\n"
#             "---------------------\n"
#             "{context_str}\n"
#             "---------------------\n"
#             "Given the context information and not prior knowledge, "
#             "answer the question: {query_str}\n"
#         ),
#     ),
# ]
# text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

# # Refine Prompt
# chat_refine_msgs = [
#     ChatMessage(
#         role=MessageRole.SYSTEM,
#         content=(
#             "Always answer the question, even if the context isn't helpful."
#         ),
#     ),
#     ChatMessage(
#         role=MessageRole.USER,
#         content=(
#             "We have the opportunity to refine the original answer "
#             "(only if needed) with some more context below.\n"
#             "------------\n"
#             "{context_msg}\n"
#             "------------\n"
#             "Given the new context, refine the original answer to better "
#             "answer the question: {query_str}. "
#             "If the context isn't useful, output the original answer again.\n"
#             "Original Answer: {existing_answer}"
#         ),
#     ),
# ]
# refine_template = ChatPromptTemplate(chat_refine_msgs)

In [3]:
import torch
# from llama_index.prompts.prompts import SimpleInputPrompt
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import BitsAndBytesConfig
from llama_index.core import PromptTemplate

# Context Window specifies how many tokens to use as context for the LLM
context_window = 2048
# Max New Tokens specifies how many new tokens to generate for the LLM
max_new_tokens = 256
# Device specifies which device to use for the LLM
device = "cuda"

# This is the prompt that will be used to instruct the model behavior
system_prompt = """
I. Introduction:
    Identify yourself: "I am an AI chatbot designed to answer your questions about E2E Networks."
    Explain your function: "I can answer your questions based on the information provided to me. If the information isn't enough, I'll guide you to the E2E Networks Support team for further assistance."

II. Input:
    Context: "You will be provided with a context (information) related to E2E Networks."
    Question: "You will be asked a question relevant to the provided context."

III. Output:
    Answer: "I will answer your question directly based on the information in the context."
    External resources: "If the context is insufficient, I will not answer based on general knowledge. Instead, I will direct you to the E2E Networks Support team and provide any relevant links found in the context, such as the official website: E2E Networks Official Website: https://www.e2enetworks.com/."
    First-person perspective: "I will always use the first-person perspective (e.g., I, us, our) to answer your questions, making the interaction feel natural and human-like."

IV. Limitations:
    Knowledge limitations: "My knowledge is confined to the information provided and should not be considered comprehensive."
    Independent reasoning: "I cannot use independent reasoning or personal opinions in my responses."
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

# Create the LLM using the HuggingFaceLLM class
llm = HuggingFaceLLM(
    context_window=context_window,
    max_new_tokens=max_new_tokens,
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map=device,
    # uncomment this if using CUDA to reduce memory usage
    # model_kwargs={
    #     # "torch_dtype": torch.float16
    #     'quantization_config':quantization_config
    # }
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
embedding_model_name = "BAAI/bge-large-en-v1.5"

In [5]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

# Create the embedding model using the HuggingFaceBgeEmbeddings class
embed_model = LangchainEmbedding(
  HuggingFaceBgeEmbeddings(model_name=embedding_model_name)
)

# Get the embedding dimension of the model by doing a forward pass with a dummy input
embed_dim = len(embed_model.get_text_embedding("Hello world")) # 1024

# database

In [6]:
connection_string = "postgresql://postgres:test123@localhost:5432"
db_name = "chatbotdb"
table_name = 'companyDocEmbeddings'

In [7]:
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter

Settings.llm = llm
Settings.embed_model = embed_model

Settings.chunk_size = 2048
Settings.chunk_overlap = 256

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings

Settings.transformations = [SentenceSplitter(chunk_size=1024)]

In [8]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

# Creates a URL object from the connection string
url = make_url(connection_string)

# Create the vector store
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name=table_name,
    embed_dim=embed_dim,
)

In [9]:
from llama_index.core import VectorStoreIndex

# Load the index from the vector store of the database
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

# Engine

In [66]:
from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.core.indices.vector_store.retrievers.retriever import VectorIndexRetriever
from llama_index.core import get_response_synthesizer

# Create the retriever that manages the index and the number of results to return
retriever = VectorIndexRetriever(
      index=index,
      similarity_top_k=5,
)

# Create the response synthesizer that will be used to synthesize the response
response_synthesizer = get_response_synthesizer(
      response_mode='accumulate',
)

# Create the query engine that will be used to query the retriever and synthesize the response
query_engine = RetrieverQueryEngine(
      retriever=retriever,
      response_synthesizer=response_synthesizer,
)

In [67]:
response = query_engine.query('Do you know about e2e networks? What is e2e networks? And what all services do they provide?')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [68]:
print(wrapper.fill(response.response))

Response 1: I am an AI chatbot designed to answer your questions about E2E Networks. Based on the context provided, E2E Networks Limited is
India's fastest growing accelerated cloud computing player. The context does not explicitly mention the services they provide, but it can be
inferred that they specialize in cloud computing. For more detailed information about their services, I would recommend visiting their
official website or contacting their support team. Here's the link to their website: https://www.e2enetworks.com/ ---------------------
Response 2: I am an AI chatbot designed to answer your questions about E2E Networks. Based on the context provided, E2E Networks is a
company that offers various cloud services. Specifically, they provide Webuzo Linux Cloud plans, which include full root access, licensing
at no extra cost, and one-click install for 450+ apps using Softaculous. The plans come with different vCPU, CPU frequency, dedicated RAM,
disk space, and prices. For more det

In [78]:
response = query_engine.query('What is the price of 8xH100?')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [79]:
print(wrapper.fill(response.response))

Response 1: I'm unable to directly answer that question based on the context provided. The context only mentions the prices for 2xA100,
4xA100, and no information about the price for 8xH100 is given. I recommend checking the official E2E Networks website or contacting their
sales team for the most accurate and up-to-date information. Here's the link to their website: https://www.e2enetworks.com/ and their
contact information: [+91-11-4084-4965](http://callto:+91-11-4084-4965) and [sales@e2enetworks.com](mailto:sales@e2enetworks.com).
--------------------- Response 2: Based on the context provided, I cannot directly answer your question as the price for an 8xH100
configuration is not listed. However, I can guide you to the E2E Networks Support team for further assistance. They can be reached at
[+91-11-4084-4965](http://callto:+91-11-4084-4965) or [sales@e2enetworks.com](mailto:sales@e2enetworks.com). Additionally, you may find more
information on their official website: https://www.e2e

In [80]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='87c8faf8-d9fd-43c5-a8b1-a4ec9aae3560', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='https://www.e2enetworks.com/pricing', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='09c838495f7a657ec983a961e9cf7dbd397e95c46a386923d20904bb330d32f0'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='f383a04e-6f91-448d-967e-e195b6e474b7', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='3be95528f2a53b2c974e25fc0be74761ee77aac3ca11f13e6cfadaba6e514ded'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='2607b591-1f01-4838-97c8-36065308ad39', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='6cbdc136a4aa54f0ece0c44707b27f91b72666813451bbe5db26f0b585ac3533')}, text='04,654\n\n[Try for Free](https://myaccount.e2enetworks.com/products/create-node)\n\n2xA100\n\n2 x 80 GB\n\n32 vCPUs\n\n230 GB\n\n3000 GB SSD\n\nâ\x82¹463/hr\n