In [None]:
%pip install llama-index-extractors-entity
%pip install llama-index-vector-stores-postgres

**Installing pgvcector extension**
CREATE EXTENSION vector;
CREATE TABLE documents (
    id serial PRIMARY KEY,
    content text,
    embedding vector(1536)  -- if using OpenAI
);


In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.core.schema import MetadataMode
from dotenv import load_dotenv
import os
import nest_asyncio
import os
import pandas as pd
from llama_index.core import Settings, Document, VectorStoreIndex
from llama_index.core.extractors.metadata_extractors import SummaryExtractor
from llama_index.core import get_response_synthesizer
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core import StorageContext

nest_asyncio.apply()
load_dotenv()  # Loads variables from .env file
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

llm = OpenAI(api_key=OPENAI_API_KEY,temperature=0.1, model="gpt-4o-mini")

In [None]:
from sqlalchemy import make_url
from dotenv import load_dotenv
import os
import pandas as pd
from sqlalchemy import create_engine
from llama_index.core import StorageContext

load_dotenv()
# Database connection details
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = "5432"  # Default PostgreSQL port
DB_NAME = os.getenv("DB_NAME")
TABLE_NAME = "hts_data" 
connection_string = "postgresql://postgres:"+str(os.getenv("DB_PASSWORD"))+"@1.2.3.4:5432/postgres"


In [12]:
from llama_index.readers.file.docs import PDFReader
from pathlib import Path
loader = PDFReader()
documents = loader.load_data(file=Path("./data/Cristiano_Ronaldo.pdf"))

**Notes**
* You're telling the system:

* "Store vectors of size 1536."

* "Use cosine similarity to compare them."

* "Use HNSW indexing with these performance/accuracy trade-offs."

In [None]:
print(documents)

In [18]:
url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=DB_NAME,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="ronaldopdf_vector",
    embed_dim=1536,  # openai embedding dimension text-embedding-ada-002
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)


Parsing nodes: 100%|██████████| 39/39 [00:00<00:00, 364.19it/s]
Generating embeddings: 100%|██████████| 98/98 [00:06<00:00, 14.85it/s]
