## Load RAG client

In [17]:
from gai.rag.client.rag_client_async import RagClientAsync
rag = RagClientAsync({
    "type":"rag",
    "url":"http://localhost:12036/gen/v1/rag",
    "ws_url":"ws://localhost:12036/gen/v1/rag/index-file/ws"
})

import os
here = os.path.dirname(__name__)
file_path=os.path.join(here, "pm_long_speech_2023.txt")

## single step index

In [18]:
async def listener(status):
    print(f"Status: {status}")
result = await rag.index_document_async(
    collection_name="demo",
    file_path=file_path,
    title="2023 National Day Rally Speech",
    source="https://www.pmo.gov.sg/Newsroom/national-day-rally-2023",
    async_callback=listener,
)

Status: {"message": "Request received."}
Status: {"message": "Breaking down document into chunks ..."}
Status: {"message": "Start indexing..."}
Status: {"progress": 1}
Status: {"progress": 3}
Status: {"progress": 4}
Status: {"progress": 6}
Status: {"progress": 7}
Status: {"progress": 9}
Status: {"progress": 10}
Status: {"progress": 12}
Status: {"progress": 13}
Status: {"progress": 15}
Status: {"progress": 16}
Status: {"progress": 18}
Status: {"progress": 19}
Status: {"progress": 21}
Status: {"progress": 22}
Status: {"progress": 24}
Status: {"progress": 25}
Status: {"progress": 27}
Status: {"progress": 28}
Status: {"progress": 30}
Status: {"progress": 31}
Status: {"progress": 33}
Status: {"progress": 34}
Status: {"progress": 36}
Status: {"progress": 37}
Status: {"progress": 39}
Status: {"progress": 40}
Status: {"progress": 42}
Status: {"progress": 43}
Status: {"progress": 45}
Status: {"progress": 46}
Status: {"progress": 48}
Status: {"progress": 50}
Status: {"progress": 51}
Status: {"pr

## Multi-step Indexing

In [19]:
here = os.path.dirname(__name__)
file_path=os.path.join(here, "pm_long_speech_2023.txt")

# step 1: create header
print(f"Creating document header...")
result = await rag.step_header_async(
    collection_name="demo",
    file_path=file_path,
    title="2023 National Day Rally Speech",
    source="https://www.pmo.gov.sg/Newsroom/national-day-rally-2023"
)
print(f"Document header created.")

# step 2: split file
print(f"Begin splitting document chunks...")
collection_name="demo"
document_id="PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U"
result = await rag.step_split_async(
    collection_name=collection_name,
    document_id=document_id,
    chunk_size=1000,
    chunk_overlap=100
)
chunkgroup_id=result.Id
print(f"Document splitted into chunks.")

# step 3: index chunks
print(f"Indexing document chunks...")
async def listener(status):
    print(f"Status: {status}")
result = await rag.step_index_async(
    collection_name=collection_name,
    document_id=document_id,
    chunkgroup_id=chunkgroup_id,
    async_callback=listener)
print(f"Indexing is completed.")


Creating document header
Document header created
Begin splitting document chunks.
Document splitted into chunks.
Indexing document chunks.
Status: {"progress": 1}
Status: {"progress": 3}
Status: {"progress": 4}
Status: {"progress": 6}
Status: {"progress": 7}
Status: {"progress": 9}
Status: {"progress": 10}
Status: {"progress": 12}
Status: {"progress": 13}
Status: {"progress": 15}
Status: {"progress": 16}
Status: {"progress": 18}
Status: {"progress": 19}
Status: {"progress": 21}
Status: {"progress": 22}
Status: {"progress": 24}
Status: {"progress": 25}
Status: {"progress": 27}
Status: {"progress": 28}
Status: {"progress": 30}
Status: {"progress": 31}
Status: {"progress": 33}
Status: {"progress": 34}
Status: {"progress": 36}
Status: {"progress": 37}
Status: {"progress": 39}
Status: {"progress": 40}
Status: {"progress": 42}
Status: {"progress": 43}
Status: {"progress": 45}
Status: {"progress": 46}
Status: {"progress": 48}
Status: {"progress": 50}
Status: {"progress": 51}
Status: {"progres

## Retrieve file

In [14]:
data = {
    "collection_name": "demo",
    "query_texts": "Who are the young seniors?",
}
result = await rag.retrieve_async(**data)
print(result)


[{'documents': 'Especially for those in their 50s and early 60s. Let us call them the “Young Seniors”. "Young”, because you are younger than the Pioneer Generation and the Merdeka Generation; “Seniors”, because you will soon retire, or maybe you have recently retired.', 'metadatas': {'Abstract': '', 'ChunkGroupId': 'fc57ef2d-ee4b-4db7-b57b-3efb8b8525e2', 'DocumentId': 'PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U', 'Keywords': '', 'PublishedDate': '', 'Source': 'https://www.pmo.gov.sg/Newsroom/national-day-rally-2023', 'Title': '2023 National Day Rally Speech'}, 'distances': 0.09020859003067017, 'ids': '38984cbc-8f96-4a61-9c00-11160fa30ab8'}, {'documents': 'Young Seniors are in a unique position today. Compared to the Pioneer and Merdeka Generations, you have benefited more from Singapore’s growth, and generally done better in life. But compared to workers younger than you, in their 30s and 40s today, you have generally earned less over your lifetimes. You have also had less time to ben

## List Docs

In [27]:
import json
result = await rag.list_collections_async()
print("COLLECTIONS:")
print(json.dumps(result))

result = await rag.list_documents_async()
print("DOCUMENTS:")
print(result)

result = await rag.list_documents_async("demo")
print("DOCUMENTS BY COLLECTION:")
print(result)

result = await rag.get_document_header_async("demo","PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U")
print("DOCUMENT:")
print(result)

result = await rag.list_document_chunks_async("demo","56bf2645-5fe6-44c8-a4b5-f11299ec72dc")
print("CHUNKS:")
print(result)

chunk_id = result[-1]
result = await rag.get_document_chunk_async("demo",chunk_id)
print("CHUNK:")
print(result)



COLLECTIONS:
{"collections": ["demo"]}
DOCUMENTS:
[IndexedDocPydantic(Id='PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U', CollectionName='demo', ByteSize=43352, FileName='pm_long_speech_2023.txt', FileType='', File=None, Source='https://www.pmo.gov.sg/Newsroom/national-day-rally-2023', Abstract=None, Authors='', Title='2023 National Day Rally Speech', Publisher='', PublishedDate=None, Comments='', Keywords='', CreatedAt=datetime.datetime(2024, 10, 28, 7, 34, 9, 827516), UpdatedAt=datetime.datetime(2024, 10, 28, 7, 46, 51, 900674), IsActive=True, ChunkGroups=[IndexedDocChunkGroupPydantic(Id='56bf2645-5fe6-44c8-a4b5-f11299ec72dc', DocumentId='PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U', SplitAlgo='recursive_split', ChunkCount=66, ChunkSize=1000, Overlap=100, IsActive=True, ChunksDir='/tmp/chunks/9a65a59855e44829ae1f9ec03df19b3f')])]
DOCUMENTS BY COLLECTION:
[IndexedDocPydantic(Id='PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U', CollectionName='demo', ByteSize=43352, FileName='pm_long_speech

[41m[30mERROR   [0m [31mhttp_utils.http_get_async: 500: {'code': 'unknown', 'message': 'Internal Server Error'}[0m


ApiException: 500: {'code': 'unknown', 'message': 'Internal Server Error'}

In [23]:
result = await rag.list_documents_async()
print("DOCUMENTS:")
print(result)


DOCUMENTS:
[IndexedDocPydantic(Id='PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U', CollectionName='demo', ByteSize=43352, FileName='pm_long_speech_2023.txt', FileType='', File=None, Source='https://www.pmo.gov.sg/Newsroom/national-day-rally-2023', Abstract=None, Authors='', Title='2023 National Day Rally Speech', Publisher='', PublishedDate=None, Comments='', Keywords='', CreatedAt=datetime.datetime(2024, 10, 28, 7, 34, 9, 827516), UpdatedAt=datetime.datetime(2024, 10, 28, 7, 46, 51, 900674), IsActive=True, ChunkGroups=[IndexedDocChunkGroupPydantic(Id='56bf2645-5fe6-44c8-a4b5-f11299ec72dc', DocumentId='PwR6VmXqAfwjn84ZM6dePsLWTldPv8cNS5dESYlsY2U', SplitAlgo='recursive_split', ChunkCount=66, ChunkSize=1000, Overlap=100, IsActive=True, ChunksDir='/tmp/chunks/9a65a59855e44829ae1f9ec03df19b3f')])]


---
### Scraper Client

In [1]:
from gai.rag.client.rag_client_async import RagClientAsync
rag = RagClientAsync({
    "client_type":"rag",
    "url":"http://localhost:12036/gen/v1/rag",
})
result=await rag.web_search_async("Singapore current time")

In [2]:
result

[{'index': 0,
  'link_title': 'https://www.timeanddate.com/worldclock/singapore',
  'link': 'https://www.timeanddate.com/worldclock/singapore',
  'chunk': 'Business Date (exclude holidays) Weekday Calculator Week Number Calculator Roman Numeral Converter Alternative Age Calculator Date Pattern Calculator Distance Calculator My Account My Account My Location My Units My Events My World Clock My Privacy Paid Services Sign in Register Home Time Zones World Clock Singapore Current Local Time in Singapore Time/General Weather Time Zone DST Changes Sun & Moon Eclipses 12 3 6 9 1 2 4 5 7 8 10 11 14:40:52 SGT Tuesday, 18 February 2025 Fullscreen Country: Singapore Long Name: Republic of Singapore Abbreviations: SG, SGP Capital: Singapore Time Zones: 1 Dial Code: +65 Time Zone in Singapore 14:40 Singapore SGT UTC +8 See all Time Zones in Singapore See Holidays in Singapore Create a Calendar for Singapore Current Local Time in Locations in Singapore with Links for More Information (1 Location) S