# Documents API

### GET /api/v1/persona/{persona_id}/documents

list public documents or private documents owned by persona

In [1]:
import httpx
with httpx.Client() as client:
    response = client.get('http://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/documents')
    print(response.json())


{'detail': {'code': 'unknown_error', 'message': 'An unexpected error occurred. Please try again later.', 'id': 'f44bf83b-790b-4008-8f6b-b45ed480d693'}}


### POST /api/v1/persona/{personaId}/document/step/temp
Step 1 - Upload the agent document to a temporary directory on the API server

In [7]:
import os
from gai.lib.common.utils import this_dir
path = os.getcwd()
file_path = os.path.join(path, 'where_to_find_the_best_chicken_rice_in_singapore.txt')
with open(file_path, 'rb') as f:
    files = {'file': (file_path,f)}
    import httpx
    with httpx.Client() as client:
        response = client.post('http://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/document/step/upload', 
                    files=files,
                    timeout=3000
                    )
        print(response.json())
        uploaded_file_path=response.json()["filename"]

# check
os.path.exists(f"/tmp/00000000-0000-0000-0000-000000000000/{uploaded_file_path}")

{'filename': '7020a45f-2811-404f-b782-32cc35ac63d2.txt'}


True

### POST /api/v1/persona/{persona_id}/document/step/header

Step 2 - Upload to RAG server and header

In [8]:
import httpx
with httpx.Client() as client:

    from gai.persona.docs.pydantic.FlattenedAgentDocumentPydantic import FlattenedAgentDocumentPydantic
    data = FlattenedAgentDocumentPydantic(
        AgentId="00000000-0000-0000-0000-000000000000",
        FileName=f"/tmp/00000000-0000-0000-0000-000000000000/{uploaded_file_path}",
        Title="8 MICHELIN-Listed Spots For Chicken Rice",
        Source="https://guide.michelin.com/sg/en/article/dining-out/8-michelin-listed-spots-for-chicken-rice",
    )
    response = client.post('http://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/document/step/header', 
                json=data.dict(),
                timeout=3000
                )
    doc=response.json()
    doc_id=doc["Id"]

# check
with httpx.Client() as client:
    response = client.get("http://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/documents")
    print(response.json())


{'documents': [{'Id': 'PPa4WO6HZIWHS4cFAWTeiAtpyA1QYHRLTNtLQyzCBHA', 'AgentId': '00000000-0000-0000-0000-000000000000', 'FileName': 'd0923ed2-c4f3-473a-8181-19f68521c760.txt', 'FileType': '.txt', 'Source': 'https://guide.michelin.com/sg/en/article/dining-out/8-michelin-listed-spots-for-chicken-rice', 'ByteSize': 13991, 'Title': '8 MICHELIN-Listed Spots For Chicken Rice', 'Abstract': None, 'Authors': None, 'Publisher': None, 'PublishedDate': None, 'Comments': None, 'Keywords': None, 'ChunkGroupId': 'ffd23115-d890-4913-81ce-feff92789b13', 'ChunkSize': 1000, 'ChunkOverlap': 100, 'ChunkCount': 23}]}


### POST /api/v1/persona/{persona_id}/document/step/split

Split file and save chunks on RAG server

In [9]:
import httpx
with httpx.Client() as client:
    data = {
        "DocumentId":doc_id,
        "ChunkSize":1000,
        "ChunkOverlap":100,
    }
    response = client.post('http://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/document/step/split', 
                json=data,
                timeout=3000
                )
    group_id = response.json()["Id"]    
    
    print(response.json())

{'Id': '53b11f12-2a16-415a-9158-a37c2f9b278a', 'DocumentId': 'PPa4WO6HZIWHS4cFAWTeiAtpyA1QYHRLTNtLQyzCBHA', 'SplitAlgo': 'recursive_split', 'ChunkCount': 23, 'ChunkSize': 1000, 'Overlap': 100, 'IsActive': True, 'ChunksDir': '/tmp/chunks/2ad491ef31c54b0d97a41adef3d05c96'}


### Test websocket connection once

This is a very important step and often neglected. Fix it before moving forward.

In [10]:
from gai.lib.common.StatusListener import StatusListener
import asyncio

# Point to websocket but not connected yet.
ws_url = "ws://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/document/step/index/ws"
listener = StatusListener(ws_url)

# spin off a seperate task and connect to websocket
async def async_callback(status):
    print(status)
listen_task=asyncio.create_task(listener.listen(async_callback))

### /api/v1/agent/{agent_id}/document/step/index

In [11]:
import asyncio
import ssl
from gai.lib.common.StatusListener import StatusListener
import httpx
import websockets
import json


from gai.lib.common.StatusListener import StatusListener
import asyncio

# Point to websocket but not connected yet.
ws_url = "ws://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/document/step/index/ws"
listener = StatusListener(ws_url)

# spin off a seperate task and connect to websocket
async def async_callback(text):
    jsoned = json.loads(text)
    print(f"async_callback: {jsoned}")
listen_task=asyncio.create_task(listener.listen(async_callback))

async with httpx.AsyncClient() as client:
    data = {
        "document_id":doc_id,
        "chunkgroup_id":group_id
    }
    try:
        response = await client.post('http://localhost:12033/api/v1/persona/00000000-0000-0000-0000-000000000000/document/step/index', 
                    json=data,
                    timeout=3000
                    )
        print(response.json())
    except Exception as e:
        print(e)



async_callback: {"progress": 4}
async_callback: {"progress": 8}
async_callback: {"progress": 13}
async_callback: {"progress": 17}
async_callback: {"progress": 21}
async_callback: {"progress": 26}
async_callback: {"progress": 30}
async_callback: {"progress": 34}
async_callback: {"progress": 39}
async_callback: {"progress": 43}
async_callback: {"progress": 47}
async_callback: {"progress": 52}
async_callback: {"progress": 56}
async_callback: {"progress": 60}
async_callback: {"progress": 65}
async_callback: {"progress": 69}
async_callback: {"progress": 73}
async_callback: {"progress": 78}
async_callback: {"progress": 82}
async_callback: {"progress": 86}
async_callback: {"progress": 91}
async_callback: {"progress": 95}
async_callback: {"progress": 100}
{'chunk_ids': ['d42c0487-f314-487f-bfdd-9d2a4d0fed1b', '27ab8769-4da0-4503-9c8b-836853de510f', '4e47dda5-cd3c-44d6-b40b-8a9b0a8d6c20', '47208a37-6726-45b6-8c02-73d872986ae0', '485d4f60-65eb-4582-98e9-2d93c4ad7ca4', '628e5e3f-1e7b-4b4a-900f-e0