In [1]:
# !conda install psycopg2
# !pip install sqlalchemy
# !pip install langchain
# !pip install llama-index
# !pip install llama-index-llms-huggingface
# !pip install torch
# !pip install llama-index-embeddings-langchain
# !pip install bitsandbytes
# !pip install sentence_transformers
# %pip install llama-index-readers-web
# %pip install llama-index-vector-stores-postgres


# LLM

In [2]:
# model_name = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [3]:
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import BitsAndBytesConfig

# Context Window specifies how many tokens to use as context for the LLM
context_window = 2048
# Max New Tokens specifies how many new tokens to generate for the LLM
max_new_tokens = 256
# Device specifies which device to use for the LLM
device = "auto"

quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Create the LLM using the HuggingFaceLLM class
llm = HuggingFaceLLM(
    context_window=context_window,
    max_new_tokens=max_new_tokens,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map=device,
    # uncomment this if using CUDA to reduce memory usage
    # model_kwargs={
    #     # "torch_dtype": torch.float16
    #     'quantization_config':quantization_config
    # }
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [4]:
embedding_model_name = "BAAI/bge-large-en-v1.5"

In [5]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

# Create the embedding model using the HuggingFaceBgeEmbeddings class
embed_model = LangchainEmbedding(
  HuggingFaceBgeEmbeddings(model_name=embedding_model_name)
)

# Get the embedding dimension of the model by doing a forward pass with a dummy input
embed_dim = len(embed_model.get_text_embedding("Hello world")) # 1024

# Database

In [6]:
connection_string = "postgresql://postgres:test123@localhost:5432"
db_name = "chatbotdb"
table_name = 'companyDocEmbeddings'

In [7]:
import psycopg2

# Connect to the database
conn = psycopg2.connect(connection_string)
# Set autocommit to True to avoid having to commit after every command
conn.autocommit = True

# Create the database
# If it already exists, then delete it and create a new one
with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

# Knowledge

In [8]:
from llama_index.readers.web import SimpleWebPageReader

documents = SimpleWebPageReader(html_to_text=True).load_data(
    [
        # "https://www.e2enetworks.com/",
        # "https://www.e2enetworks.com/products",
        # "https://www.e2enetworks.com/product/cpu-intensive-computing-c2-series",
        # "https://www.e2enetworks.com/product/high-memory-series",
        # "https://www.e2enetworks.com/product/smart-dedicated-compute",
        # "https://www.e2enetworks.com/product/cpanel-linux-cloud",
        # "https://www.e2enetworks.com/product/windows-on-e2e-cloud",
        # "https://www.e2enetworks.com/product/windows-ms-sql-on-e2e-cloud",
        # "https://www.e2enetworks.com/product/plesk-integrated-cloud-servers",
        # "https://www.e2enetworks.com/product/nvidia-tesla-v100",
        # "https://www.e2enetworks.com/product/load-balancer",
        "https://www.e2enetworks.com/pricing",
        # "https://www.e2enetworks.com/become-a-partner",
        # "https://www.e2enetworks.com/careers",
        # "https://www.e2enetworks.com/about-us",
        # "https://www.e2enetworks.com/investors",
        # "https://www.e2enetworks.com/contact-us",
        # "https://www.e2enetworks.com/team",
        # "https://www.e2enetworks.com/testimonials",
        # "https://www.e2enetworks.com/contact-sales",
        # "https://www.e2enetworks.com/escalation-matrix",
        # "https://www.e2enetworks.com/policies/service-level-agreement",
        # "https://www.e2enetworks.com/policies/terms-of-service",
        # "https://www.e2enetworks.com/policies/privacy-policy",
        # "https://www.e2enetworks.com/policies/refund-policy",
        # "https://www.e2enetworks.com/policy-faq",
        # "https://www.e2enetworks.com/countries-served",

    ]
)


# Index the knowledge

In [9]:
# from llama_index.core.indices.service_context import ServiceContext
# from llama_index.core import set_global_service_context

# # Set the chunk size and overlap that controls how the documents are chunked
# chunk_size = 1024
# chunk_overlap = 32

# # Create the service context
# service_context = ServiceContext.from_defaults(
#     embed_model=embed_model,
#     llm=llm,
#     chunk_size=chunk_size,
#     chunk_overlap=chunk_overlap,
# )

# # Set the global service context
# set_global_service_context(service_context)

In [10]:
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter

In [11]:
Settings.llm = llm
Settings.embed_model = embed_model

Settings.chunk_size = 2024
Settings.chunk_overlap = 256

In [12]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings

Settings.transformations = [SentenceSplitter(chunk_size=1024)]

In [13]:
# # open-source
# from transformers import AutoTokenizer

# Settings.tokenzier = AutoTokenizer.from_pretrained(
#     model_name
# )

In [14]:
from sqlalchemy import make_url
from llama_index.vector_stores.postgres import PGVectorStore

# Creates a URL object from the connection string
url = make_url(connection_string)

# Create the vector store
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name=table_name,
    embed_dim=embed_dim,
)

In [15]:
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import VectorStoreIndex


# Create the storage context to be used while indexing and storing the vectors
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Create the index
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/44 [00:00<?, ?it/s]

In [None]:
# from llama_index.core import SummaryIndex

# summary_index = SummaryIndex.from_documents(documents, storage_context=storage_context, show_progress=True)

In [None]:
conn.close()