In [1]:
import textwrap
wrapper = textwrap.TextWrapper(width=140)

# LLM

In [2]:
model_name = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
# model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
# # model_name = 'mistralai/Mistral-7B-v0.1'

In [3]:
import torch
# from llama_index.prompts.prompts import SimpleInputPrompt
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import BitsAndBytesConfig
from llama_index.core import PromptTemplate

# Context Window specifies how many tokens to use as context for the LLM
context_window = 4096
# Max New Tokens specifies how many new tokens to generate for the LLM
max_new_tokens = 256
# Device specifies which device to use for the LLM
device = "cuda"

# This is the prompt that will be used to instruct the model behavior
system_prompt = """
    You are an AI chatbot that is designed to answer questions related to E2E Networks. 
    You are provided with a context and a question. You need to answer the question based on the context provided. 
    If the context is not helpful, do not answer based on prior knowledge, instead, redirect the user to the E2E Networks Support team. 
    You should also provide links that you got from context that are relevant to the answer. 
    You are allowed to answer in first person only, like I/Us/Our; It should feel like a human is answering the question. 
    You should only provide the links and not like [E2E Networks Official Website](https://www.e2enetworks.com/)
    You're not allowed to say something like "Based on the context, I think the answer is...", instead, you should directly answer the question.
    When in confusion, you can ask for more information from the user.

    Here is an example of how you should answer:

    Question: What is the pricing for E2E Networks?
    Context: E2E Networks is a cloud computing company that provides cloud infrastructure and cloud services to businesses and startups.
    Unacceptable Answer: Based on the context, I think the pricing for E2E Networks is...
    Acceptable Answer: The pricing for E2E Networks is...
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

# Create the LLM using the HuggingFaceLLM class
llm = HuggingFaceLLM(
    context_window=context_window,
    max_new_tokens=max_new_tokens,
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map=device,
    generate_kwargs={"temperature": 0.2, "top_k": 5, "top_p": 0.95, "do_sample": True},
    # uncomment this if using CUDA to reduce memory usage
    # model_kwargs={
    #     # "torch_dtype": torch.float16
    #     'quantization_config':quantization_config
    # }
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Embedding Model

In [4]:
embedding_model_name = "BAAI/bge-large-en-v1.5"

In [5]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

# Create the embedding model using the HuggingFaceBgeEmbeddings class
embed_model = LangchainEmbedding(
  HuggingFaceBgeEmbeddings(model_name=embedding_model_name)
)

# Get the embedding dimension of the model by doing a forward pass with a dummy input
embed_dim = len(embed_model.get_text_embedding("Hello world")) # 1024

# Settings

In [6]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

Settings.chunk_size = 1024
Settings.chunk_overlap = 512

# Database

In [7]:
connection_string = "postgresql://postgres:test123@localhost:5432"
db_name = "chatbotdb"
table_name = 'companyDocEmbeddings'

In [8]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

# Creates a URL object from the connection string
url = make_url(connection_string)

# Create the vector store
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name=table_name,
    embed_dim=embed_dim,
)

In [9]:
from llama_index.core import VectorStoreIndex

# Load the index from the vector store of the database
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

# Engine

In [10]:
from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.core.indices.vector_store.retrievers.retriever import VectorIndexRetriever
from llama_index.core import get_response_synthesizer

# Create the retriever that manages the index and the number of results to return
retriever = VectorIndexRetriever(
      index=index,
      similarity_top_k=16,
)

# Create the response synthesizer that will be used to synthesize the response
response_synthesizer = get_response_synthesizer(
      response_mode='simple_summarize',
)

# Create the query engine that will be used to query the retriever and synthesize the response
engine = RetrieverQueryEngine(
      retriever=retriever,
      response_synthesizer=response_synthesizer,
)

In [11]:
response = engine.query('what is the business of e2e networks?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


E2E Networks is a cloud computing company that provides cloud infrastructure and cloud services to businesses and startups. They offer
various solutions like API & CLI Access, CDN, Reserved IP, and Cloud Computing solutions. Their platform focuses on affordability,
assistance, accessibility, accommodative, and AtmanirbharBharat (self-reliant India) principles. They have a reputation for being a trusted
and reliable partner for Higher Education and Research Institutions, Enterprises, and AI/ML startups in India and globally.


In [12]:
response = engine.query('In what all countries does E2E Operate?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


E2E operates in the following countries: United States of America (USA), United Kingdom (UK), Germany, France, Canada, Switzerland,
Australia, Netherlands, Italy, Spain, Israel, Sweden, Belgium, Austria, Singapore, Norway, Denmark, Finland, Portugal, Brazil, UAE, and
South Korea.


In [13]:
response = engine.query('What all GPU powered machines does E2E provide?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


E2E provides various GPU powered machines including NVIDIA T4, A100, and H100 GPUs.


In [19]:
response = engine.query('What all machines does E2E provide for high-end machine/deep learning tasks?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


E2E provides GPU instances for high-end machine learning and deep learning tasks, starting as low as ₹ 30/hr for NVIDIA T4 GPUs. They also
offer NVIDIA A100/H100 GPUs, which are the cloud's flagship machine learning platform, Tir. For more information, please visit the E2E
Networks website. [Link](https://www.e2enetworks.com/products#cloud-GPUs)


In [14]:
response = engine.query('What is the price of HGX 8xH100?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The price of HGX 8xH100 on E2E Cloud starts as low as ₹ 800/hr.


In [15]:
response = engine.query('What is the refund policy of E2E Networks?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


E2E Networks has a Refund Policy that is published on their website. The policy covers various scenarios such as invoice discrepancies,
minimum billing amounts, promotional code benefits, and software licenses. The policy also mentions that if you find any discrepancies in
the invoices, you need to email them within 7 days of receiving the invoice. The policy also states that if you do not inform them of any
discrepancy within 7 days, they will not entertain any requests to modify bills or offer refunds in any other form. For more details, you
can refer to the Refund Policy published on the E2E Networks website. [Refund Policy](https://www.e2enetworks.com/policies/refund-policy)


In [16]:
response = engine.query('What about privacy of my data?')
print(wrapper.fill(response.response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


We take privacy seriously and implement reasonable and appropriate measures designed to help secure your data against accidental or unlawful
loss, access or disclosure. However, you are responsible for properly configuring and using the Services and maintaining the security and
confidentiality of your accounts and access to the hosted systems, including encrypting any Personal Information you store on or transmit
to/from our hosted system. We do not promise to retain any preservations or backups of your Customer Data. For more information, please
refer to the Privacy Policy of the relevant customer to which you submitted your Personal Information.
