In [4]:
! pip install -q ragstack-ai pypdf playwright apify-client

Configure the Environment Variables

In [1]:
import os
from getpass import getpass

# Enter your settings for Astra DB and OpenAI:
os.environ["ASTRA_DB_API_ENDPOINT"] = input("Enter your Astra DB API Endpoint: ")
os.environ["ASTRA_DB_APPLICATION_TOKEN"] = getpass("Enter your Astra DB Token: ")
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API Key: ")

In [7]:
from langchain_openai import OpenAIEmbeddings
from langchain_astradb import AstraDBVectorStore
import os

# Configure your embedding model and vector store
embedding = OpenAIEmbeddings(model="text-embedding-3-large", dimensions=1024)
vstore = AstraDBVectorStore(
    collection_name="acb_chatbot_new",
    embedding=embedding,
    token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
    api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
)
print("Astra vector store configured")

Astra vector store configured


Scrape webiste by calling the actor from the Python code

In [None]:
from langchain.document_loaders import ApifyDatasetLoader
from langchain.utilities import ApifyWrapper
from langchain_core.document_loaders.base import Document

os.environ["APIFY_API_TOKEN"] = input("Enter your Apify Token: ")

apify = ApifyWrapper()
loader = apify.call_actor(
   actor_id="apify/website-content-crawler",
   run_input={"startUrls": [{"url": "https://acb.com.vn/en"}]},
   dataset_mapping_function=lambda item: Document(
       page_content=item["text"] or "", metadata={"source": item["url"]}
   ),
)

docs = loader.load()

Load Dataset when the scraped content is already available by passing the Dataset ID

In [5]:
from langchain_community.document_loaders import ApifyDatasetLoader
from langchain_core.documents import Document

os.environ["APIFY_API_TOKEN"] = input("Enter your Apify Token: ")

loader = ApifyDatasetLoader(
    dataset_id="OjtIQh7bWcK3cDxi3",
    dataset_mapping_function=lambda dataset_item: Document(
        page_content=dataset_item["text"], metadata={"source": dataset_item["url"]}
    ),
)
documents = loader.load()

Load the dataset in AstraDB

In [8]:
# Create embeddings by inserting your documents into the vector store.
inserted_ids = vstore.add_documents(documents)
print(f"\nInserted {len(inserted_ids)} documents.")


Inserted 214 documents.


In [None]:
#for Debugging, search the DB on the query
res=print(vstore.similarity_search_with_score("What are the benefits of home mortgage?", k=5))
for res, score in results:
    print(f"* [SIM={score:5f}] [{res.metadata}]")

[(Document(page_content='Loans\nHome\n/Personal - Loans\nFinancial stability for a trouble-free life\nAccommodating a variety of loan purposes with competitive interest rates\nChoose a type of loans\nOnline loan\nStep 1\nFirst, you must present your I.D. card or passport to ACB’s office.\nStep 2\nYou make a request for online loan. Then ACB’s staff will ask for your name and I.D. card or passport.\nStep 3\nYou are instructed to download ACB Mobile Banking application and provided with username and sent a password to your mobile number.\nLoan interest calculator\nMonthly principal payment\n0 VNĐ\nAmount of interest paid monthly\n0 VNĐ\nAmount to pay first month\n0 VNĐ\nRegistration for consultation\n© 2023 Asia Commercial Joint Stock Bank\n© 2023 Asia Commercial Joint Stock Bank\nWe use cookies to give you the best online experience, measure your visits to our site and to enable marketing activities. For details, see the ACB Cookie Notice.', metadata={'source': 'https://acb.com.vn/en/pe

In [None]:
#for Debugging
results_filtered = vstore.similarity_search("",
    k=3,
    filter={"source": "https://acb.com.vn/en/personal-borrow/home-mortgage"},
)
for res in results_filtered:
    print(f"* {res.page_content} [{res.metadata}]")

* We use cookies to give you the best online experience, measure your visits to our site and to enable marketing activities. For details, see the ACB Cookie Notice. [{'source': 'https://acb.com.vn/en/personal-borrow/home-mortgage'}]


In [None]:
#for Debugging, search the DB on the query
res=print(vstore.max_marginal_relevance_search("What are the benefits of home mortgage?", k=5))
for res, score in results:
    print(f"* [SIM={score:5f}] [{res.metadata}]")

[Document(page_content='Loans\nHome\n/Personal - Loans\nFinancial stability for a trouble-free life\nAccommodating a variety of loan purposes with competitive interest rates\nChoose a type of loans\nOnline loan\nStep 1\nFirst, you must present your I.D. card or passport to ACB’s office.\nStep 2\nYou make a request for online loan. Then ACB’s staff will ask for your name and I.D. card or passport.\nStep 3\nYou are instructed to download ACB Mobile Banking application and provided with username and sent a password to your mobile number.\nLoan interest calculator\nMonthly principal payment\n0 VNĐ\nAmount of interest paid monthly\n0 VNĐ\nAmount to pay first month\n0 VNĐ\nRegistration for consultation\n© 2023 Asia Commercial Joint Stock Bank\n© 2023 Asia Commercial Joint Stock Bank\nWe use cookies to give you the best online experience, measure your visits to our site and to enable marketing activities. For details, see the ACB Cookie Notice.', metadata={'source': 'https://acb.com.vn/en/per