# Step 1: Test Basic ArangoDB Connection

In [3]:
import sys
sys.path.append("../")
from src.services.graph_db import get_graph_db_service, Node, Edge
from src.config import settings
from arango import ArangoClient
from arango.exceptions import ArangoError

# Test basic connection
async def test_connection():
    graph_db = get_graph_db_service()
    is_connected = await graph_db.connect()
    print(f"Connection status: {is_connected}")
    print(f"Database object: {graph_db.db}")
    return graph_db

graph_db = await test_connection()

Connection status: True
Database object: <StandardDatabase research>


# Step 2: Test Direct ArangoDB Connection

<src.services.graph_db.arangodb.ArangoDBService at 0x7f1f0cdebe50>

In [11]:
graph_db.db.create_collection("mycollection2")

<StandardCollection mycollection2>

In [14]:
# Test with a simple document
async def test_simple_insertion():
    # Create a simple node with a safe key
    test_key = "test123"
    test_node = Node(
        id=f"mycollection2/{test_key}",
        #key=test_key,
        label="Test Node",
        properties={"message": "Hello ArangoDB"},
        type="test"
    )
    
    print(f"Attempting to insert node with key: {test_key}")
    success = await graph_db.upsert_node(test_node)
    print(f"Insertion success: {success}")
    return success

result = await test_simple_insertion()

Failed to INSERT/UPDATE document mycollection2/test123: [HTTP 400][ERR 1221] illegal document key


Attempting to insert node with key: test123
Insertion success: False


In [20]:
# Let's check what's happening with the key validation
def debug_key_validation():
    test_key = "test123"
    print(f"Testing key: '{test_key}'")
    print(f"Key length: {len(test_key)}")
    print(f"Key type: {type(test_key)}")
    
    # Check each character
    for i, char in enumerate(test_key):
        print(f"Char {i}: '{char}' (ASCII: {ord(char)})")
    
    # Test the regex pattern
    import re
    pattern = r'^[a-zA-Z0-9_-]+$'
    matches = re.match(pattern, test_key)
    print(f"Regex match: {matches is not None}")
    
    # Check if it starts with underscore or digit
    print(f"Starts with digit: {test_key[0].isdigit()}")
    print(f"Starts with underscore: {test_key.startswith('_')}")
    
    # Check length constraints
    print(f"Key length valid (1-254): {1 <= len(test_key) <= 254}")

debug_key_validation()

Testing key: 'test123'
Key length: 7
Key type: <class 'str'>
Char 0: 't' (ASCII: 116)
Char 1: 'e' (ASCII: 101)
Char 2: 's' (ASCII: 115)
Char 3: 't' (ASCII: 116)
Char 4: '1' (ASCII: 49)
Char 5: '2' (ASCII: 50)
Char 6: '3' (ASCII: 51)
Regex match: True
Starts with digit: False
Starts with underscore: False
Key length valid (1-254): True


In [15]:
# Test inserting directly with the ArangoDB client
async def test_direct_arango_insert():
    try:
        # Get the database connection from our service
        if graph_db.db is None:
            await graph_db.connect()
        
        # Create a test collection if it doesn't exist
        collection_name = "testcollection"
        if not graph_db.db.has_collection(collection_name):
            graph_db.db.create_collection(collection_name)
            print(f"Created collection: {collection_name}")
        
        # Test with the exact same key
        test_key = "test123"
        collection = graph_db.db.collection(collection_name)
        
        print(f"Attempting to insert document with _key: '{test_key}'")
        
        # Try a simple document insert
        document = {
            "_key": test_key,
            "test_field": "This is a test",
            "number_field": 123
        }
        
        result = collection.insert(document)
        print(f"Direct insertion successful: {result}")
        return True
        
    except Exception as e:
        print(f"Direct insertion failed: {e}")
        # Print the full error details
        import traceback
        traceback.print_exc()
        return False

result = await test_direct_arango_insert()

Created collection: testcollection
Attempting to insert document with _key: 'test123'
Direct insertion successful: {'_id': 'testcollection/test123', '_key': 'test123', '_rev': '_kWorU7S---'}


In [22]:
# Let's examine what the graph_db.upsert_node method is actually doing
def examine_upsert_method():
    # Check if the method exists and its source
    if hasattr(graph_db, 'upsert_node'):
        print("upsert_node method exists")
        import inspect
        source = inspect.getsource(graph_db.upsert_node)
        print("Method source:")
        print("=" * 50)
        print(source)
        print("=" * 50)
    else:
        print("upsert_node method not found")

examine_upsert_method()

upsert_node method exists
Method source:
    async def upsert_node(self, node: Node) -> bool:
        """Insert or update a node in ArangoDB"""
        try:
            collection_name = f"nodes_{node.type}"

            # Ensure the collection exists
            # print('+-' * 25)
            # print(collection_name)
            # print('+-' * 25)
            if not self.db.has_collection(collection_name):
                self.db.create_collection(collection_name)

            collection = self.db.collection(collection_name)

            # Prepare document
            document = {
                "_key": node.id,
                "label": node.label,
                **node.properties, "created_at": datetime.utcnow().isoformat(),
                "updated_at": datetime.utcnow().isoformat()
            }

            # Upsert the document
            try:
                # collection.upsert({"_key": node.id}, document, return_new=False)
                collection.insert(document, overwrit

In [16]:
# Test various key patterns to see what works
async def test_various_keys():
    test_keys = [
        "test123",           # Original failing key
        "simplekey",         # Only letters
        "12345",             # Only numbers (might fail if cannot start with digit)
        "key_with_underscore",
        "key-with-hyphen",
        "a",                 # Single character
        "x" * 100,          # Long key
        "test_123",         # Mixed
    ]
    
    collection_name = "keytestcollection"
    
    if graph_db.db and not graph_db.db.has_collection(collection_name):
        graph_db.db.create_collection(collection_name)
    
    for key in test_keys:
        try:
            if graph_db.db:
                collection = graph_db.db.collection(collection_name)
                document = {"_key": key, "test": "value"}
                collection.insert(document)
                print(f"✓ Key '{key}' - SUCCESS")
                # Clean up for next test
                collection.delete(key)
            else:
                print(f"✗ Key '{key}' - No DB connection")
        except Exception as e:
            print(f"✗ Key '{key}' - FAILED: {e}")

await test_various_keys()

✓ Key 'test123' - SUCCESS
✓ Key 'simplekey' - SUCCESS
✓ Key '12345' - SUCCESS
✓ Key 'key_with_underscore' - SUCCESS
✓ Key 'key-with-hyphen' - SUCCESS
✓ Key 'a' - SUCCESS
✓ Key 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' - SUCCESS
✓ Key 'test_123' - SUCCESS


In [24]:
# Check ArangoDB server version and configuration
def check_arango_server():
    try:
        if graph_db.db:
            version = graph_db.db.version()
            print(f"ArangoDB Server Version: {version}")
            
            # Check server details
            server_details = graph_db.db.server_details()
            print("Server Details:")
            for key, value in server_details.items():
                print(f"  {key}: {value}")
                
            # Check database details
            db_details = graph_db.db.database_details()
            print("Database Details:")
            for key, value in db_details.items():
                print(f"  {key}: {value}")
                
        else:
            print("No database connection available")
    except Exception as e:
        print(f"Error checking server: {e}")

check_arango_server()

ArangoDB Server Version: 3.12.4-3
Error checking server: 'StandardDatabase' object has no attribute 'server_details'


In [25]:
graph_db.db.

<StandardDatabase research>

In [17]:
# Check how the Node object is being constructed
def test_node_construction():
    test_key = "test123"
    node_id = f"testcollection/{test_key}"
    
    node = Node(
        id=node_id,
        label="Test Node",
        properties={"test": "value"},
        type="test"
    )
    
    print("Node object details:")
    print(f"  id: {node.id}")
    print(f"  label: {node.label}")
    print(f"  properties: {node.properties}")
    print(f"  type: {node.type}")
    
    # Check if the ID splitting works correctly
    if '/' in node.id:
        collection, key = node.id.split('/', 1)
        print(f"  Collection: {collection}")
        print(f"  Key: {key}")
        print(f"  Key == test123: {key == test_key}")

test_node_construction()

Node object details:
  id: testcollection/test123
  label: Test Node
  properties: {'test': 'value'}
  type: test
  Collection: testcollection
  Key: test123
  Key == test123: True


In [18]:
# Let's try the absolute simplest possible case
async def minimal_test():
    try:
        if graph_db.db is None:
            await graph_db.connect()
            
        # Use the default collection
        collection_name = "_graphs"  # System collection that should exist
        if graph_db.db and not graph_db.db.has_collection(collection_name):
            graph_db.db.create_collection(collection_name)
            
        if graph_db.db.has_collection(collection_name):
            collection = graph_db.db.collection(collection_name)
            print(f"Using collection: {collection_name}")
            
            # Try to list documents (read operation)
            count = collection.count()
            print(f"Collection count: {count}")
            
            return True
        else:
            print(f"Collection {collection_name} does not exist")
            return False
            
    except Exception as e:
        print(f"Minimal test failed: {e}")
        return False

await minimal_test()

Using collection: _graphs
Collection count: 0


True

In [29]:
# Test inserting directly with the ArangoDB client
async def test_direct_arango_insert():
    try:
        # Get the database connection from our service
        if graph_db.db is None:
            await graph_db.connect()
        
        # Create a test collection if it doesn't exist
        collection_name = "testcollection"
        if not graph_db.db.has_collection(collection_name):
            graph_db.db.create_collection(collection_name)
            print(f"Created collection: {collection_name}")
        
        # Test with the exact same key
        test_key = "test1234"
        collection = graph_db.db.collection(collection_name)
        
        print(f"Attempting to insert document with _key: '{test_key}'")
        
        # Try a simple document insert
        document = {
            "_key": test_key,
            "test_field": "This is a test",
            "number_field": 1234
        }
        
        result = collection.insert(document)
        print(f"Direct insertion successful: {result}")
        return True
        
    except Exception as e:
        print(f"Direct insertion failed: {e}")
        # Print the full error details
        import traceback
        traceback.print_exc()
        return False

result = await test_direct_arango_insert()

Attempting to insert document with _key: 'test1234'
Direct insertion successful: {'_id': 'testcollection/test1234', '_key': 'test1234', '_rev': '_kWaRm-u---'}


# Step 3c: Examine the GraphDB Service Implementation

In [30]:
# Let's look at the actual upsert_node method implementation
def examine_graphdb_service():
    print("GraphDB Service Analysis:")
    print(f"Service type: {type(graph_db).__name__}")
    print(f"Is mock: {getattr(graph_db, 'is_mock', 'Not set')}")
    print(f"DB object: {getattr(graph_db, 'db', 'Not set')}")
    
    # Check if upsert_node exists and examine it
    if hasattr(graph_db, 'upsert_node'):
        print("\nupsert_node method exists")
        # Let's see what parameters it takes
        import inspect
        sig = inspect.signature(graph_db.upsert_node)
        print(f"Method signature: {sig}")
    else:
        print("upsert_node method not found")

examine_graphdb_service()

GraphDB Service Analysis:
Service type: ArangoDBService
Is mock: Not set
DB object: <StandardDatabase research>

upsert_node method exists
Method signature: (node: src.services.graph_db.base.Node) -> bool


In [2]:
# Let's test what happens when we call upsert_node with the same parameters
async def debug_upsert_node():
    test_key = "test123"
    node_id = f"testcollection/{test_key}"
    
    # Create the same node as in our failing test
    test_node = Node(
        id=node_id,
        label="Test Node",
        properties={"message": "Hello ArangoDB"},
        type="test"
    )
    
    print(f"Calling upsert_node with:")
    print(f"  Node ID: {test_node.id}")
    print(f"  Label: {test_node.label}")
    print(f"  Properties: {test_node.properties}")
    print(f"  Type: {test_node.type}")
    
    # Now call the actual method
    try:
        result = await graph_db.upsert_node(test_node)
        print(f"upsert_node result: {result}")
        return result
    except Exception as e:
        print(f"upsert_node exception: {e}")
        import traceback
        traceback.print_exc()
        return False

result = await debug_upsert_node()

Calling upsert_node with:
  Node ID: testcollection/test123
  Label: Test Node
  Properties: {'message': 'Hello ArangoDB'}
  Type: test
upsert_node result: True


In [37]:
    test_key = "test123765"
    node_id = f"testcollection/{test_key}"

    test_node = Node(
        id=node_id,
        label="Test Node",
        properties={"message": "Hello ArangoDB"},
        type="test"
    )

In [39]:

res = graph_db.upsert_node(test_node)

In [40]:
 print(f"upsert_node result: {result}")

upsert_node result: False


In [4]:
# Verify nodes and edges are actually stored
async def verify_data_storage():
    if graph_db.db is None:
        await graph_db.connect()
    
    print("VERIFYING DATA STORAGE")
    print("=" * 60)
    
    # Check paper nodes
    if graph_db.db.has_collection('nodes_paper'):
        papers = graph_db.db.collection('nodes_paper')
        paper_count = papers.count()
        print(f"Paper nodes: {paper_count}")
        
        # Show first few papers
        cursor = papers.all(limit=3)
        for paper in cursor:
            print(f"  - Paper: {paper.get('_key', 'N/A')} - {paper.get('title', 'No title')}")
    
    # Check entity nodes
    if graph_db.db.has_collection('nodes_entities'):
        entities = graph_db.db.collection('nodes_entities')
        entity_count = entities.count()
        print(f"Entity nodes: {entity_count}")
        
        # Show first few entities
        cursor = entities.all(limit=5)
        for entity in cursor:
            print(f"  - Entity: {entity.get('_key', 'N/A')} - {entity.get('label', 'No label')} ({entity.get('type', 'No type')})")
    
    # Check edges
    if graph_db.db.has_collection('edges_contains'):
        edges = graph_db.db.collection('edges_contains')
        edge_count = edges.count()
        print(f"Edges: {edge_count}")
        
        # Show first few edges
        cursor = edges.all(limit=5)
        for edge in cursor:
            print(f"  - Edge: {edge.get('_from', 'N/A')} -> {edge.get('_to', 'N/A')} ({edge.get('label', 'No label')})")

await verify_data_storage()

VERIFYING DATA STORAGE
Paper nodes: 0
Edges: 0


In [2]:
papers = graph_db.db.collection('nodes_paper')
papers.count()

0