# LlamaIndex and Postgres Vector Store

> In this notebook we are going to show how to use Postgresql and pgvector to perform vector searches in LlamaIndex

### Install dependencies

In [None]:
%pip install llama-index-vector-stores-postgres
!pip install llama-index
%pip install pymupdf

### Create a new data base
> I have a postgres docker image installed on my machine and I just create a new database for our lab:

```psql
postgres=# CREATE DATABASE lab_db;
CREATE DATABASE
```

**Below I import all the dependencies I need.**

In [2]:
import logging
import sys

# Uncomment to see debug logs
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from sqlalchemy import make_url
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
import textwrap
import openai


### Setup OpenAI
> The first step is to configure the openai key. It will be used to created embeddings for the documents loaded into the index

In [3]:

import os

os.environ["OPENAI_API_KEY"] = "your_key"
openai.api_key = os.environ["OPENAI_API_KEY"]

### Loading documents
> Load the documents stored in the data/paul_graham/ using the SimpleDirectoryReader

In [None]:

documents = SimpleDirectoryReader("../file").load_data()
print("Document ID:", documents[0].doc_id)


### Create the Database
> Using an existing postgres running at localhost, create the database we'll be using.

In [5]:
import psycopg2

connection_string = "postgresql://postgres:minhasenha@192.168.15.134:5432"
db_name = "lab_db"
conn = psycopg2.connect(connection_string)
conn.autocommit = True

### Create the index
> Here we create an index backed by Postgres using the documents loaded previously. PGVectorStore takes a few arguments.

In [None]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="cleancode",
    embed_dim=1536,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

### Query the index
> We can now ask questions using our index.

In [None]:
response = query_engine.query("Para Christopher Alexander qual era o objetivo do arquiteto?")
print(textwrap.fill(str(response), 100))

### Hybrid Search
> To enable hybrid search, you need to:

1. pass in hybrid_search=True when constructing the PGVectorStore (and optionally configure text_search_config with the desired language)
2. pass in vector_store_query_mode="hybrid" when constructing the query engine (this config is passed to the retriever under the hood). You can also optionally set the sparse_top_k to configure how many results we should obtain from sparse text search (default is using the same value as similarity_top_k).

In [None]:
url = make_url(connection_string)
hybrid_vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="cleancode__hybrid_search",
    embed_dim=1536,  # openai embedding dimension
    hybrid_search=True,
    text_search_config="english",
)

storage_context = StorageContext.from_defaults(
    vector_store=hybrid_vector_store
)
hybrid_index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [None]:
hybrid_query_engine = hybrid_index.as_query_engine(
    vector_store_query_mode="hybrid", sparse_top_k=2
)
hybrid_response = hybrid_query_engine.query(
    "Jeniffer Kohnke e Angela Brooks são responsáveis por?"
)


print(hybrid_response)

### Improving hybrid search with QueryFusionRetriever
> Since the scores for text search and vector search are calculated differently, the nodes that were found only by text search will have a much lower score. You can often improve hybrid search performance by using QueryFusionRetriever, which makes better use of the mutual information to rank the nodes.

In [51]:
from llama_index.core.response_synthesizers import CompactAndRefine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

vector_retriever = hybrid_index.as_retriever(
    vector_store_query_mode="default",
    similarity_top_k=5,
)
text_retriever = hybrid_index.as_retriever(
    vector_store_query_mode="sparse",
    similarity_top_k=5,  # interchangeable with sparse_top_k in this context
)
retriever = QueryFusionRetriever(
    [vector_retriever, text_retriever],
    similarity_top_k=5,
    num_queries=1,  # set this to 1 to disable query generation
    mode="relative_score",
    use_async=False,
)

response_synthesizer = CompactAndRefine()
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [None]:
response = query_engine.query(
    "Especifique os principais temas abordados no Capítulo 2: Nomes Significativos."
)
print(response)

In [None]:
url = make_url(connection_string)
hybrid_vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="cleancode__hybrid_search",
    embed_dim=1536,  # openai embedding dimension
    hybrid_search=True,
    text_search_config="english",
)


hybrid_index = VectorStoreIndex.from_vector_store(vector_store=hybrid_vector_store)

In [8]:
from llama_index.core.response_synthesizers import CompactAndRefine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

vector_retriever = hybrid_index.as_retriever(
    vector_store_query_mode="default",
    similarity_top_k=5,
)
text_retriever = hybrid_index.as_retriever(
    vector_store_query_mode="sparse",
    similarity_top_k=5,  # interchangeable with sparse_top_k in this context
)
retriever = QueryFusionRetriever(
    [vector_retriever, text_retriever],
    similarity_top_k=5,
    num_queries=1,  # set this to 1 to disable query generation
    mode="relative_score",
    use_async=False,
)

response_synthesizer = CompactAndRefine()
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [None]:
response = query_engine.query(
    "O que é a A Notação Húngara? Está descrito no Capítulo 2."
)
print(response)