In [41]:
from datasets import load_dataset
from pgml import Database
import os
import asyncio
from dotenv import load_dotenv
from time import time
from rich.pretty import pprint

In [10]:
load_dotenv()

In [5]:
local_pgml = "postgres://postgres@127.0.0.1:5433/pgml_development"
conninfo = os.environ.get("PGML_CONNECTION", local_pgml)

In [6]:
# Prepare Data
dataset = load_dataset("quora", split="train")
questions = []

for record in dataset["questions"]:
    questions.extend(record["text"])

# remove duplicates
documents = []
for question in list(set(questions)):
    if question:
        documents.append({"text": question})

Found cached dataset quora (/Users/santis/.cache/huggingface/datasets/quora/default/0.0.0/36ba4cd42107f051a158016f1bea6ae3f4685c5df843529108a54e42d86c1e04)


In [7]:
db = Database(conninfo)
collection_name = "quora_collection"
collection = await db.create_or_get_collection(collection_name)

In [11]:
# Upsert documents, chunk text, and generate embeddings
await collection.upsert_documents(documents[:200])
await collection.generate_chunks()
await collection.generate_embeddings()

In [43]:
start = time()
query = "What is a good mobile os?"
result = await collection.vector_search(query)
_end = time()
pprint(result)
pprint("Query time = %0.3f" % (_end - start))

In [18]:
start = time()
console.print("Query using query builder ..")
query = "What is a good mobile os?"
result = await collection.query().vector_recall(query).limit(5).run()
_end = time()

console.print("\nResults for '%s'" % (query), style="bold")
console.print(result)
console.print("Query time = %0.3f" % (_end - start))

In [19]:
await collection.generate_tsvectors('english')

In [35]:
result = await collection.query().vector_recall(query).filter_full_text("web").limit(10).run()
result

[(0.8406398640938563,
  '"What are some examples of good UI/UX design for a b2b web app?"',
  {}),
 (0.8217760413996846,
  '"What do you use as a note taking, brainstorming, web clip-keeping application on the Mac?"',
  {}),
 (0.8120549943123971, '"How does WhatsApp web work?"', {})]