# One Knowledge Base to Rule Them All Workflow

## 0. Setup

In [24]:
import requests

def get_auth_headers(email: str, password: str) -> dict[str, str]:
    """Get auth headers for the selected user."""
    supabase_auth_url = "https://sb.stack-ai.com"
    anon_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImZic3VhZGZxaGtseG9rbWxodHNkIiwicm9sZSI6ImFub24iLCJpYXQiOjE2NzM0NTg5ODAsImV4cCI6MTk4OTAzNDk4MH0.Xjry9m7oc42_MsLRc1bZhTTzip3srDjJ6fJMkwhXQ9s"

    request_url = f"{supabase_auth_url}/auth/v1/token?grant_type=password"
    response = requests.post(
        request_url,
        json={
            "email": email,
            "password": password,
            "gotrue_meta_security": {},
        },
        headers={
            "Content-Type": "application/json",
            "Apikey": anon_key,
        },
        timeout=10,
    )
    response.raise_for_status()
    access_token = response.json()["access_token"]

    headers = {"Authorization": f"Bearer {access_token}"}

    return headers

### Login to your account to get your auth headers

In [25]:
email = "stackaitest@gmail.com"
password = input(f"Introduce the password for {email}: ")

auth_headers = get_auth_headers(email, password)

### Create a request session

In [26]:
session = requests.Session()

session.headers.update(auth_headers)

### Set the correct url for the backend you want to use

In [27]:
backend_url = "https://api.stack-ai.com"

In [28]:
org_id = session.get(f"{backend_url}/organizations/me/current").json()["org_id"]

## 1. Connections

### 1.1 Create a Google Drive connection in the Stack AI Workflow builder

1. Go to the Stack AI Workflow builder
2. On the left sidebar, click on Knowledge Bases
3. Drop the Google Drive node on the canvas
4. Click on connect to Google Drive on the node and follow the authorization steps.

### 1.2 List all the connections for the selected user

Your newly created connection will be listed here

In [29]:
connection_list_url = f"{backend_url}/connections?connection_provider=gdrive&limit=5"
response = session.get(connection_list_url)
print(response)

response.raise_for_status()


connection = response.json()[0]


<Response [200]>


In [30]:
print("Connection information:")
print("----------------------")
print(f"Connection ID: {connection['connection_id']}")
print(f"Connection name: {connection['name']}")
print(f"Created at: {connection['created_at']}")
print(f"Updated at: {connection['updated_at']}")

# Commented to avoid leaking sensitive information
# print(f"Connection provider: {connection['connection_provider_data']}")

Connection information:
----------------------
Connection ID: 6a023d47-efe4-415e-8220-22dc80c6f2ea
Connection name: test connection
Created at: 2024-12-03T17:12:05.702552+00:00
Updated at: 2025-01-25T04:52:10.098166+00:00


### 1.3 List available resources under the connection

1.   List item
2.   List item



In [31]:
connection_id = "e171b021-8c00-4c3f-8a93-396095414f57"  # WORKING connection
connection_resources_url = f"{backend_url}/connections/{connection_id}/resources"
children_resources_url = f"{backend_url}/connections/{connection_id}/resources/children"
print(children_resources_url)

https://api.stack-ai.com/connections/e171b021-8c00-4c3f-8a93-396095414f57/resources/children


**Root resources**

Lets start with the root resources, to do it, we should not specify a path, so we will get all the resources in the connection.

In [32]:
print("Pinging: ", children_resources_url)
root_resources_response = session.get(children_resources_url)
print(root_resources_response)

root_resources_response.raise_for_status()

# Get the full response
response_data = root_resources_response.json()

# Extract the actual resources from the 'data' key
root_resources = response_data['data']  # fixed the strctured data parsing error
print(f"Found {len(root_resources)} resources")

for resource in root_resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    print(f"{emoji} {resource['inode_path']['path']:30} (resource_id: {resource['resource_id']})")

Pinging:  https://api.stack-ai.com/connections/e171b021-8c00-4c3f-8a93-396095414f57/resources/children
<Response [200]>
Found 12 resources
📁 Cache test                     (resource_id: 1xEP2dHUjHVX89-6GPozX3RCpL33pNOWG)
📁 classes                        (resource_id: 1GrHAPg2LVnx78y7diTMC_6AVQV1sehk2)
📁 papers                         (resource_id: 1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM)
📁 shared_files_manu              (resource_id: 1Quj3aI6wI6rPhw1b4KaPaceX1YPKB3G2)
📁 TaskFixedNotebook              (resource_id: 1qheAMB-HJZg_GEWoiddbENJDzsYy3iHY)
📄 aaaa.txt                       (resource_id: 1gph8d5jKDw6Fn7z7ZSFthYOHQ2ls-SfZjCfZtfKRMVc)
📄 J1 Application Letter of Recommendation Template [BBVA].txt (resource_id: 1rqai8wJsUmpDTbVhP7IVO6iyf06y6cxfAbI-q1ToVrg)
📄 J1 Application Letter of Recommendation Template [Zapiens].txt (resource_id: 1RpcDo1CvbKoA_phXvqNHKJUcrJ2eQsnvB5LFuygBjjg)
📄 Kapture 2024-11-11 at 03.46.51.mp4 (resource_id: 1pIJFT6Vgv6PEUbXDc3-QbgGmlOylcPp3)
📄 KOREAZO.xlsx           

**Lets take a look at the raw response from the API**

In [33]:
for resource in root_resources:
    print(resource)

{'knowledge_base_id': '00000000-0000-0000-0000-000000000000', 'created_at': '2024-11-02T00:30:41.611000Z', 'modified_at': '2024-11-02T00:30:41.611000Z', 'indexed_at': None, 'inode_type': 'directory', 'resource_id': '1xEP2dHUjHVX89-6GPozX3RCpL33pNOWG', 'inode_path': {'path': 'Cache test'}, 'dataloader_metadata': {}, 'user_metadata': {}, 'inode_id': None}
{'knowledge_base_id': '00000000-0000-0000-0000-000000000000', 'created_at': '2024-08-29T18:53:23.652000Z', 'modified_at': '2024-08-29T18:53:23.652000Z', 'indexed_at': None, 'inode_type': 'directory', 'resource_id': '1GrHAPg2LVnx78y7diTMC_6AVQV1sehk2', 'inode_path': {'path': 'classes'}, 'dataloader_metadata': {}, 'user_metadata': {}, 'inode_id': None}
{'knowledge_base_id': '00000000-0000-0000-0000-000000000000', 'created_at': '2024-07-02T17:48:07.590000Z', 'modified_at': '2024-07-02T17:55:15.747000Z', 'indexed_at': None, 'inode_type': 'directory', 'resource_id': '1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM', 'inode_path': {'path': 'papers'}, 'data

**Get the information about a specific file, like, 'Very Important notes.txt'**

> Add blockquote



In [34]:
# Get details for a specific file (Very Important notes.txt)
from urllib.parse import urlencode

# Using the correct resource_id from your working connection
data = {"resource_id": "1gph8d5jKDw6Fn7z7ZSFthYOHQ2ls-SfZjCfZtfKRMVc"}

# Encode the query parameters
encoded_query_params = urlencode(data, doseq=True)
url = f"{connection_resources_url}?{encoded_query_params}"

print("Pinging: ", url)
resource_response = session.get(url)

resource_response.raise_for_status()

print("\n\nRaw response:")
print(resource_response.text)

Pinging:  https://api.stack-ai.com/connections/e171b021-8c00-4c3f-8a93-396095414f57/resources?resource_id=1gph8d5jKDw6Fn7z7ZSFthYOHQ2ls-SfZjCfZtfKRMVc


Raw response:
{}


**Get the resources in a directory, like Classes**


In [35]:
# Get the resources in a directory, like Classes
from urllib.parse import urlencode

# Using the correct resource_id for "classes" directory from your working connection
data = {"resource_id": "1GrHAPg2LVnx78y7diTMC_6AVQV1sehk2"}

# Encode the query parameters
encoded_query_params = urlencode(data, doseq=True)
url = f"{children_resources_url}?{encoded_query_params}"

print("Pinging: ", url)
response = session.get(url)

response.raise_for_status()

# Fixed parsing erros : Extract the 'data' from the response
response_data = response.json()
if 'data' in response_data:
    resources = response_data['data']
else:
    resources = response_data

print(f"Found {len(resources)} resources in classes directory:")

for resource in resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    print(f"{emoji} {resource['inode_path']['path']:30} (resource_id: {resource['resource_id']})")

print("\n\nRaw response structure:")
print(f"Response keys: {response_data.keys() if isinstance(response_data, dict) else 'Not a dict'}")


Pinging:  https://api.stack-ai.com/connections/e171b021-8c00-4c3f-8a93-396095414f57/resources/children?resource_id=1GrHAPg2LVnx78y7diTMC_6AVQV1sehk2
Found 4 resources in classes directory:
📁 classes/algebra                (resource_id: 10_70p7qhQb_a5I_nXqI-AG9y-ecfSeXU)
📁 classes/calculus               (resource_id: 1mdINabiuab4guo2yXspW-fKqwdUdzE71)
📁 classes/exams                  (resource_id: 1HPF28wtRZaJpsj9M_BapdBhr3tRa8aUJ)
📄 classes/CPP Cheatsheet Hackr.pdf (resource_id: 1EEPQoVcmZjh0skut_GyfRoibZ70L-qzy)


Raw response structure:
Response keys: dict_keys(['data', 'next_cursor', 'current_cursor'])


**Get the resources in a directory, like classes (nested)**



In [36]:
from urllib.parse import urlencode

# note: This resource_id "1HPF28wtRZaJpsj9M_BapdBhr3tRa8aUJ" was from the old connection
# we need to replace this with an actual subdirectory ID from the "classes" folder for the new connection id we have
# Run Block 2 first to see the available subdirectories and their resource_ids

# For now, let's use the "papers" directory as an example of nested browsing
data = {"resource_id": "1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM"}  # papers directory

# Encode the query parameters
encoded_query_params = urlencode(data, doseq=True)
url = f"{children_resources_url}?{encoded_query_params}"

print("Pinging: ", url)
response = session.get(url)

response.raise_for_status()

# Fixed: Extract the 'data' from the response
response_data = response.json()
if 'data' in response_data:
    resources = response_data['data']
else:
    resources = response_data

print(f"Found {len(resources)} resources in papers directory:")

for resource in resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    print(f"{emoji} {resource['inode_path']['path']:30} (resource_id: {resource['resource_id']})")

print("\n\nRaw response structure:")
print(f"Response keys: {response_data.keys() if isinstance(response_data, dict) else 'Not a dict'}")

Pinging:  https://api.stack-ai.com/connections/e171b021-8c00-4c3f-8a93-396095414f57/resources/children?resource_id=1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM
Found 3 resources in papers directory:
📁 papers/another folder          (resource_id: 1Qxd08BxFKH0vWdTRnbrA2ncSvUR7CKzH)
📄 papers/react_paper.pdf         (resource_id: 1SIDiEnXfUp0E3Pt-yHxyyJT6aJ53WvrG)
📄 papers/self_rag.pdf            (resource_id: 1Y9etHfbu-i3oDA9rF_gCjx4GMmwsC-YS)


Raw response structure:
Response keys: dict_keys(['data', 'next_cursor', 'current_cursor'])


# 2. Knowledge Bases

Once the user has decided which resources they want to index, they can create a knowledge base. A knowledge base is a collection of resources that are indexed in our vector database.


In this example, we will suppose that the user has decided to index the following resources:
- 📁 papers                         (resource_id: 1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM)
- 📄 Very Important notes.txt       (resource_id: 1GYpHUOiSYXGz_9GeUGgQkwQUJqCAxibGd9szwMJQSIg)


This means that test_folder and all of its subfolders will be indexed as well as the manu_document_awesome.txt file will be indexed.

It is important that the frontend contains logic to avoid passing both a resource and its children in the list of resources to be indexed. For example, if the frontend passses both
- 📁 test_folder                    (resource_id: 1cGeHFazvfHDSOfDJ_SRZEzkm5q1-Zn41)
- 📄 test_folder/Contrato_pagos_inmediatos.pdf (resource_id: 18nr8ZUE0QQZgNITw1JeEV1ZaobMDxUNC)

While the backend will work fine and index everything under test_folder, there will be duplicate work to get the metadata of the Contrato_pagos_inmediatos.pdf file both as a child of test_folder and as an independent resource.

## 2.1 Creating a knowledge base
Lets create a knowledge base that will be synced to the selected resources.

In [37]:
import json

create_kb_url = f"{backend_url}/knowledge_bases"

connection_source_ids = [
    "1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM",  # The papers folder
    "1GYpHUOiSYXGz_9GeUGgQkwQUJqCAxibGd9szwMJQSIg",  # Very Important Notes.txt file
]

data = {
    "connection_id": connection_id,  # Make sure this is set to the working connection ID as the "test folder" does now works
    "connection_source_ids": connection_source_ids,
    "name": "Test Knowledge Base",
    "description": "This is a test knowledge base",
    "indexing_params": {
        "ocr": False,
        "unstructured": True,
        "embedding_params": {"embedding_model": "text-embedding-ada-002", "api_key": None},
        "chunker_params": {"chunk_size": 1500, "chunk_overlap": 500, "chunker": "sentence"},
    },
    "org_level_role": None,
    "cron_job_id": None,
}

print("Pinging: ", create_kb_url)
print(f"Creating KB with connection_id: {connection_id}")

kb_create_response = session.post(create_kb_url, json=data)

kb_create_response.raise_for_status()
new_kb_json = kb_create_response.json()
print("KB Creation Response:")
print(json.dumps(new_kb_json, indent=2))

# fix: Extract knowledge_base_id from response
knowledge_base_id = new_kb_json["knowledge_base_id"]
print(f"\n✅ Knowledge Base Created! ID: {knowledge_base_id}")

Pinging:  https://api.stack-ai.com/knowledge_bases
Creating KB with connection_id: e171b021-8c00-4c3f-8a93-396095414f57
KB Creation Response:
{
  "knowledge_base_id": "e48bd995-f8ea-40be-982d-edfc99fcacaf",
  "connection_id": "e171b021-8c00-4c3f-8a93-396095414f57",
  "created_at": "2025-05-29T04:54:44.857292Z",
  "updated_at": "2025-05-29T04:54:44.857297Z",
  "connection_source_ids": [
    "1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM",
    "1GYpHUOiSYXGz_9GeUGgQkwQUJqCAxibGd9szwMJQSIg"
  ],
  "website_sources": [],
  "connection_provider_type": "gdrive",
  "is_empty": true,
  "total_size": 0,
  "name": "Test Knowledge Base",
  "description": "This is a test knowledge base",
  "indexing_params": {
    "ocr": false,
    "unstructured": true,
    "embedding_params": {
      "api": null,
      "base_url": null,
      "embedding_model": "text-embedding-ada-002",
      "batch_size": 300,
      "track_usage": true,
      "timeout": 5
    },
    "chunker_params": {
      "chunk_size": 1500,
      "chunk

In [38]:
print(new_kb_json)

knowledge_base_id = new_kb_json["knowledge_base_id"]
print(f"\n✅ Knowledge Base Created! ID: {knowledge_base_id}")

{'knowledge_base_id': 'e48bd995-f8ea-40be-982d-edfc99fcacaf', 'connection_id': 'e171b021-8c00-4c3f-8a93-396095414f57', 'created_at': '2025-05-29T04:54:44.857292Z', 'updated_at': '2025-05-29T04:54:44.857297Z', 'connection_source_ids': ['1YeS8H92ZmTZ3r2tLn1m43GG58gRzvYiM', '1GYpHUOiSYXGz_9GeUGgQkwQUJqCAxibGd9szwMJQSIg'], 'website_sources': [], 'connection_provider_type': 'gdrive', 'is_empty': True, 'total_size': 0, 'name': 'Test Knowledge Base', 'description': 'This is a test knowledge base', 'indexing_params': {'ocr': False, 'unstructured': True, 'embedding_params': {'api': None, 'base_url': None, 'embedding_model': 'text-embedding-ada-002', 'batch_size': 300, 'track_usage': True, 'timeout': 5}, 'chunker_params': {'chunk_size': 1500, 'chunk_overlap': 500, 'chunker_type': 'sentence'}}, 'cron_job_id': None, 'org_id': '0d582f36-52dd-403f-a38a-ccf4dfa06180', 'org_level_role': None, 'user_metadata_schema': None, 'dataloader_metadata_schema': None}

✅ Knowledge Base Created! ID: e48bd995-f8ea

## 2.2 Sync Knowledge Base

To load the resources from the connection into the knowledge base, we need to call the `sync` endpoint of the knowledge base. The syncing will be done on a background task, so we need to wait for the task to finish before we can access the resources.

In [39]:
kb_sync_url = f"{backend_url}/knowledge_bases/sync/trigger/{knowledge_base_id}/{org_id}"

print(f"\nPinging: {kb_sync_url}")
sync_response = session.get(kb_sync_url)

print(f"Sync response status: {sync_response.status_code}")
print(f"Sync response text: {sync_response.text}")

if sync_response.status_code == 200:
    print("✅ Sync initiated successfully!")
else:
    print(f"❌ Sync failed with status: {sync_response.status_code}")


Pinging: https://api.stack-ai.com/knowledge_bases/sync/trigger/e48bd995-f8ea-40be-982d-edfc99fcacaf/0d582f36-52dd-403f-a38a-ccf4dfa06180
Sync response status: 200
Sync response text: null
✅ Sync initiated successfully!


## 2.3 Get the list of files in the knowledge base

At first, the files will be in the pending state as their indexing is not yet complete. If you wait for about a minute, you should see the files in the indexed state.


In [40]:
import time

# Wait a few seconds...
time.sleep(5)

In [41]:
from urllib.parse import urlencode

kb_children_resources_url = f"{backend_url}/knowledge_bases/{knowledge_base_id}/resources/children"

data = {
    "resource_path": "/",
}

encoded_query_params = urlencode(data)
url = f"{kb_children_resources_url}?{encoded_query_params}"
print(f"\nPinging: {url}")

kb_resources_response = session.get(url)

kb_resources_response.raise_for_status()

response_data = kb_resources_response.json()
if isinstance(response_data, dict) and 'data' in response_data:
    kb_resources = response_data['data']
else:
    kb_resources = response_data

print(f"\n📚 Knowledge Base Root Contents ({len(kb_resources)} items):")
print("=" * 60)

for resource in kb_resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    status = resource.get('status', 'unknown')
    path = resource['inode_path']['path'] if isinstance(resource['inode_path'], dict) else resource['inode_path']

    print(f"{emoji} {path:30} (ID: {resource['resource_id']}) status: {status}")


Pinging: https://api.stack-ai.com/knowledge_bases/e48bd995-f8ea-40be-982d-edfc99fcacaf/resources/children?resource_path=%2F

📚 Knowledge Base Root Contents (2 items):
📄 Very important notes.txt       (ID: 1GYpHUOiSYXGz_9GeUGgQkwQUJqCAxibGd9szwMJQSIg) status: indexed
📁 papers                         (ID: STACK_VFS_VIRTUAL_DIRECTORY) status: unknown


In [42]:
kb_children_resources_url = f"{backend_url}/knowledge_bases/{knowledge_base_id}/resources/children"

data = {
    "resource_path": "/papers",
}
encoded_query_params = urlencode(data)
url = f"{kb_children_resources_url}?{encoded_query_params}"
print(f"\nPinging: {url}")

kb_resources_response = session.get(url)

kb_resources_response.raise_for_status()

response_data = kb_resources_response.json()
if isinstance(response_data, dict) and 'data' in response_data:
    kb_resources = response_data['data']
else:
    kb_resources = response_data

print(f"\n📁 Papers Directory Contents ({len(kb_resources)} items):")
print("=" * 60)

for resource in kb_resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    status = resource.get('status', 'unknown')
    path = resource['inode_path']['path'] if isinstance(resource['inode_path'], dict) else resource['inode_path']

    print(f"{emoji} {path:30} (ID: {resource['resource_id']}) status: {status}")



Pinging: https://api.stack-ai.com/knowledge_bases/e48bd995-f8ea-40be-982d-edfc99fcacaf/resources/children?resource_path=%2Fpapers

📁 Papers Directory Contents (3 items):
📄 papers/self_rag.pdf            (ID: 1Y9etHfbu-i3oDA9rF_gCjx4GMmwsC-YS) status: pending
📄 papers/react_paper.pdf         (ID: 1SIDiEnXfUp0E3Pt-yHxyyJT6aJ53WvrG) status: pending
📁 papers/another folder          (ID: STACK_VFS_VIRTUAL_DIRECTORY) status: unknown


## 2.4 Manually manipulate the knowledge base

### Delete a file
For now, only files can be deleted.

In [43]:
from urllib.parse import urlencode
import json

kb_resources_url = f"{backend_url}/knowledge_bases/{knowledge_base_id}/resources"

# note: You need to replace "papers/react_paper.pdf" with an actual file path from your KB
# First, list the papers directory to see what files exist, then use one of those paths
data = {
    "resource_path": "papers/react_paper.pdf",  #  Replace with actual file path
}
encoded_query_params = urlencode(data)

print(f"🗑️  Attempting to delete file: papers/react_paper.pdf")
print(f"DELETE URL: {kb_resources_url}?{encoded_query_params}")

response = session.delete(f"{kb_resources_url}?{encoded_query_params}")

print(f"Delete response status: {response.status_code}")
print(f"Delete response text: {response.text}")

if response.status_code in [200, 204]:
    print("✅ File deletion request sent successfully!")
elif response.status_code == 404:
    print("❌ File not found - make sure the file path exists in your KB")
else:
    print(f"❌ Delete failed with status: {response.status_code}")


🗑️  Attempting to delete file: papers/react_paper.pdf
DELETE URL: https://api.stack-ai.com/knowledge_bases/e48bd995-f8ea-40be-982d-edfc99fcacaf/resources?resource_path=papers%2Freact_paper.pdf
Delete response status: 204
Delete response text: 
✅ File deletion request sent successfully!


In [44]:
# import time


# VERIFY DELETION - List papers directory after deletion

import time

print("\n⏳ Waiting 5 seconds for deletion to process...")
time.sleep(5)

# List the resources again and make sure that the document is gone
kb_children_resources_url = f"{backend_url}/knowledge_bases/{knowledge_base_id}/resources/children"

data = {
    "resource_path": "papers/",
}
encoded_query_params = urlencode(data)
url = f"{kb_children_resources_url}?{encoded_query_params}"

print(f"\n📂 Listing papers directory after deletion:")
print(f"GET URL: {url}")

kb_resources_response = session.get(url)

kb_resources_response.raise_for_status()

response_data = kb_resources_response.json()
if isinstance(response_data, dict) and 'data' in response_data:
    kb_resources = response_data['data']
else:
    kb_resources = response_data

print(f"\n📁 Papers Directory Contents After Deletion ({len(kb_resources)} items):")
print("=" * 70)

for resource in kb_resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    status = resource.get('status', 'unknown')
    path = resource['inode_path']['path'] if isinstance(resource['inode_path'], dict) else resource['inode_path']

    print(f"{emoji} {path:35} (ID: {resource['resource_id']}) status: {status}")



⏳ Waiting 5 seconds for deletion to process...

📂 Listing papers directory after deletion:
GET URL: https://api.stack-ai.com/knowledge_bases/e48bd995-f8ea-40be-982d-edfc99fcacaf/resources/children?resource_path=papers%2F

📁 Papers Directory Contents After Deletion (2 items):
📄 papers/self_rag.pdf                 (ID: 1Y9etHfbu-i3oDA9rF_gCjx4GMmwsC-YS) status: indexed
📁 papers/another folder               (ID: STACK_VFS_VIRTUAL_DIRECTORY) status: unknown


### Create a file
For now, only files can be created.

In [45]:
import requests

print(f"\n📝 Creating a new file in the knowledge base...")

# Define the metadata and file content
create_request_metadata = {
    "resource_type": "file",
    "resource_path": "papers/demo_file3.txt",
}
file_content = b"This is test file content created via API"

# Prepare the files dictionary
files = {
    "file": ("demo_file3.txt", file_content, "text/plain"),
}

kb_create_file_url = f"{backend_url}/knowledge_bases/{knowledge_base_id}/resources"
print(f"POST URL: {kb_create_file_url}")

# Make the POST request (this one is correct - uses files and data for multipart)
response = session.post(
    kb_create_file_url,
    files=files,
    data=create_request_metadata,  # Use data instead of json for multipart form-data
)

print(f"Create file response status: {response.status_code}")
print(f"Create file response text: {response.text}")

if response.status_code in [200, 201 , 202]:
    print("✅ File creation request sent successfully!")
else:
    print(f"❌ File creation failed with status: {response.status_code}")



📝 Creating a new file in the knowledge base...
POST URL: https://api.stack-ai.com/knowledge_bases/e48bd995-f8ea-40be-982d-edfc99fcacaf/resources
Create file response status: 202
Create file response text: null
✅ File creation request sent successfully!


In [46]:
import time

print("\n⏳ Waiting 5 seconds for file creation to process...")
time.sleep(5)

# List the resources again and make sure that the new document appears
kb_children_resources_url = f"{backend_url}/knowledge_bases/{knowledge_base_id}/resources/children"

data = {
    "resource_path": "papers/",
}
encoded_query_params = urlencode(data)
url = f"{kb_children_resources_url}?{encoded_query_params}"

print(f"\n📂 Listing papers directory after file creation:")
print(f"GET URL: {url}")

kb_resources_response = session.get(url)
kb_resources_response.raise_for_status()

response_data = kb_resources_response.json()
if isinstance(response_data, dict) and 'data' in response_data:
    kb_resources = response_data['data']
else:
    kb_resources = response_data

print(f"\n📁 Papers Directory Contents After File Creation ({len(kb_resources)} items):")
print("=" * 70)

for resource in kb_resources:
    emoji = "📁" if resource["inode_type"] == "directory" else "📄"
    status = resource.get('status', 'unknown')
    path = resource['inode_path']['path'] if isinstance(resource['inode_path'], dict) else resource['inode_path']

    print(f"{emoji} {path:30} (resource_id: {resource['resource_id']}) status: {status}")


⏳ Waiting 5 seconds for file creation to process...

📂 Listing papers directory after file creation:
GET URL: https://api.stack-ai.com/knowledge_bases/e48bd995-f8ea-40be-982d-edfc99fcacaf/resources/children?resource_path=papers%2F

📁 Papers Directory Contents After File Creation (3 items):
📄 papers/demo_file3.txt          (resource_id: d8dc13ae-891b-49ce-baa6-db9ea1fa475f) status: indexed
📄 papers/self_rag.pdf            (resource_id: 1Y9etHfbu-i3oDA9rF_gCjx4GMmwsC-YS) status: indexed
📁 papers/another folder          (resource_id: STACK_VFS_VIRTUAL_DIRECTORY) status: unknown
