In [2]:
# --------------------------------------------------------------
# import libraries
# --------------------------------------------------------------

import os
import time

from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_postgres.vectorstores import PGVector
from langchain_community.vectorstores.pgvector import PGVector
from pyprojroot import here
from langchain_postgres import PGVector

load_dotenv()

True

In [None]:
# --------------------------------------------------------------
# connection
# --------------------------------------------------------------

connection = "postgresql+psycopg://qul:{BangMuchlis123!}@localhost:5432/test_db"

In [None]:
# --------------------------------------------------------------
# load document from data source
# --------------------------------------------------------------

file_path = here("data/txt/The Project Gutenberg eBook of A Christmas Carol in Prose; Being a Ghost Story of Christmas.txt")
loader = TextLoader(str(file_path))

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OllamaEmbeddings(model="bge-m3:latest")

query = "The Project Gutenberg eBook of A Christmas Carol in Prose; Being a Ghost Story of Christmas"


In [None]:
# --------------------------------------------------------------
# create pgvectore store
# --------------------------------------------------------------
"""
Donwload postgresql to run locally:
https://www.postgresql.org/download/

How to install the pgvector extension:
https://github.com/pgvector/pgvector

Fix common installation issues:
https://github.com/pgvector/pgvector?tab=readme-ov-file#installation-notes
"""

collection_name = "The Project Gutenberg eBook of A Christmas Carol in Prose"

# create store 
vector_store = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

# load store
pgvector_docsearch = PGVector(
    connection=connection,
    collection_name=collection_name ,
    embeddings=embeddings,
)


In [None]:
# --------------------------------------------------------------
# query the index with pgvector 
# --------------------------------------------------------------
def run_query_pgvector(docsearch, query):
    docs = docsearch.similarity_search(query, k=4)
    result = docs[0].page_content
    return result


def calculate_average_execution_time(func, *args, **kwargs):
    total_execution_time = 0
    num_runs = 10
    for _ in range(num_runs):
        start_time = time.time()
        result = func(*args, **kwargs) 
        end_time = time.time()
        execution_time = end_time - start_time
        total_execution_time += execution_time
    average_execution_time = round(total_execution_time / num_runs, 2)
    print(result)
    print(
        f"\nThe function took an average of {average_execution_time} seconds to execute."
    )
    return

calculate_average_execution_time(
    run_query_pgvector, docsearch=pgvector_docsearch, query=query
)


"I am the Ghost of Christmas Past."

"Long Past?" inquired Scrooge: observant of its dwarfish
stature.

"No. Your past."

Perhaps, Scrooge could not have told anybody why, if
anybody could have asked him; but he had a special desire
to see the Spirit in his cap; and begged him to be covered.

"What!" exclaimed the Ghost, "would you so soon put out,
with worldly hands, the light I give? Is it not enough
that you are one of those whose passions made this cap, and
force me through whole trains of years to wear it low upon
my brow!"

Scrooge reverently disclaimed all intention to offend
or any knowledge of having wilfully "bonneted" the Spirit at
any period of his life. He then made bold to inquire what
business brought him there.

"Your welfare!" said the Ghost.

Scrooge expressed himself much obliged, but could not
help thinking that a night of unbroken rest would have been
more conducive to that end. The Spirit must have heard
him thinking, for it said immediately:

"Your reclamation, t

In [None]:
# --------------------------------------------------------------
# add more collection to pgvector
# --------------------------------------------------------------

file_path = here("data/txt/The Project Gutenberg eBook of Romeo and Juliet.txt")
loader = TextLoader(str(file_path))
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
new_docs = text_splitter.split_documents(documents)


collection_name_2 = "The Project Gutenberg eBook of Romeo and Juliet"

vector_store = PGVector(
    embeddings=embeddings,
    collection_name=collection_name_2,
    connection=connection,
    use_jsonb=True,
)

Created a chunk of size 1105, which is longer than the specified 1000
Created a chunk of size 1437, which is longer than the specified 1000
Created a chunk of size 1871, which is longer than the specified 1000
Created a chunk of size 1015, which is longer than the specified 1000
Created a chunk of size 1006, which is longer than the specified 1000
Created a chunk of size 1054, which is longer than the specified 1000
Created a chunk of size 1432, which is longer than the specified 1000
Created a chunk of size 1367, which is longer than the specified 1000
Created a chunk of size 2178, which is longer than the specified 1000
Created a chunk of size 1390, which is longer than the specified 1000
Created a chunk of size 1502, which is longer than the specified 1000
Created a chunk of size 1410, which is longer than the specified 1000
Created a chunk of size 1741, which is longer than the specified 1000
Created a chunk of size 1184, which is longer than the specified 1000
Created a chunk of s

In [None]:
# --------------------------------------------------------------
# delete collection
# --------------------------------------------------------------
pgvector = PGVector(
    collection_name=collection_name,
    connection_string=connection,
    embedding_function=embeddings,
)

pgvector.delete_collection()
print("✅ Koleksi berhasil dihapus dari database.")

✅ Koleksi berhasil dihapus dari database.


  pgvector = PGVector(
