# S3 Vectors Basic Demo

Simple demonstration of core S3 Vectors functionality with a small dataset.

This notebook shows:
- Setting up the S3 Vectors client
- Creating embeddings with LM Studio
- Basic vector operations (insert/search)
- Semantic similarity search with 5 sample documents

In [4]:
# Setup S3 Vectors client using boto3
import boto3
import time

# Connect to S3 Vectors using the official boto3 client
s3vectors_client = boto3.client(
    's3vectors',
    endpoint_url='http://localhost:8000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin123',
    region_name='us-east-1'
)

print("‚úÖ S3 Vectors client ready (using official boto3 S3 Vectors service)")

‚úÖ S3 Vectors client ready (using official boto3 S3 Vectors service)


In [5]:
# Embedding function (with fallback)
import requests
import numpy as np

def get_text_embedding(text):
    """Generate text embedding using LM Studio local server."""
    try:
        response = requests.post(
            "http://localhost:1234/v1/embeddings",
            headers={"Content-Type": "application/json"},
            json={
                "input": text,
                "model": "text-embedding-nomic-embed-text-v1.5"
            },
            timeout=30
        )
        
        if response.status_code == 200:
            return response.json()['data'][0]['embedding']
        else:
            print(f"‚ö†Ô∏è Warning: LM Studio request failed with status {response.status_code}, using fallback")
            return np.random.rand(768).tolist()  # 768-dim fallback
            
    except Exception as e:
        print(f"‚ö†Ô∏è Warning: LM Studio connection failed ({e}), using fallback")
        return np.random.rand(768).tolist()  # 768-dim fallback

# Test the embedding function
test_embedding = get_text_embedding("test query")
print(f"‚úÖ Embedding function working, dimension: {len(test_embedding)}")

‚úÖ Embedding function working, dimension: 768


In [7]:
# Create bucket and index
bucket_name = f"basic-demo-{int(time.time())}"
index_name = "demo-index"

print(f"üèóÔ∏è Creating bucket: {bucket_name}")
try:
    response = s3vectors_client.create_vector_bucket(
        vectorBucketName=bucket_name
    )
    print(f"‚úÖ Created bucket: {bucket_name}")
except Exception as e:
    if "already exists" in str(e).lower():
        print(f"üì¶ Using existing bucket: {bucket_name}")
    else:
        print(f"‚ùå Error creating bucket: {e}")
        raise

print(f"üèóÔ∏è Creating index: {index_name}")
try:
    response = s3vectors_client.create_index(
        vectorBucketName=bucket_name,
        indexName=index_name,
        dimension=768,
        dataType="float32",
        distanceMetric="cosine"
    )
    print(f"‚úÖ Created index: {index_name}")
except Exception as e:
    if "already exists" in str(e).lower():
        print(f"üìä Using existing index: {index_name}")
    else:
        print(f"‚ùå Error creating index: {e}")
        raise

print("‚úÖ Setup complete")

üèóÔ∏è Creating bucket: basic-demo-1755921004
‚úÖ Created bucket: basic-demo-1755921004
üèóÔ∏è Creating index: demo-index
‚úÖ Created index: demo-index
‚úÖ Setup complete
‚úÖ Created index: demo-index
‚úÖ Setup complete


In [8]:
# Insert sample documents (small scale)
documents = [
    {
        "key": "doc1",
        "text": "Python is a high-level programming language with dynamic semantics.",
        "metadata": {"category": "programming", "topic": "python"}
    },
    {
        "key": "doc2", 
        "text": "Machine learning is a subset of artificial intelligence.",
        "metadata": {"category": "AI", "topic": "machine_learning"}
    },
    {
        "key": "doc3",
        "text": "Natural language processing enables computers to understand human language.",
        "metadata": {"category": "AI", "topic": "nlp"}
    },
    {
        "key": "doc4",
        "text": "Vector databases store and search high-dimensional data efficiently.",
        "metadata": {"category": "database", "topic": "vectors"}
    },
    {
        "key": "doc5",
        "text": "Deep learning uses neural networks with multiple layers.",
        "metadata": {"category": "AI", "topic": "deep_learning"}
    }
]

print("üì¶ Inserting sample documents...")
vectors = []
for doc in documents:
    embedding = get_text_embedding(doc['text'])
    vectors.append({
        "key": doc['key'],
        "data": {"float32": embedding},
        "metadata": doc['metadata']
    })

response = s3vectors_client.put_vectors(
    vectorBucketName=bucket_name,
    indexName=index_name,
    vectors=vectors
)
print(f"‚úÖ Inserted {len(vectors)} documents")

üì¶ Inserting sample documents...
‚úÖ Inserted 5 documents
‚úÖ Inserted 5 documents


In [9]:
# Test semantic search
test_queries = [
    "What is artificial intelligence?",
    "How do neural networks work?", 
    "Python programming language features",
    "Vector search and similarity"
]

print("üîç Testing semantic search...\n")

for i, query_text in enumerate(test_queries, 1):
    print(f"Query {i}: {query_text}")
    
    query_embedding = get_text_embedding(query_text)
    
    try:
        results = s3vectors_client.query_vectors(
            vectorBucketName=bucket_name,
            indexName=index_name,
            queryVector={"float32": query_embedding},  # Fixed: Use proper format
            topK=3,
            returnMetadata=True
        )
        
        vectors = results.get('vectors', [])
        print(f"  Found {len(vectors)} results:")
        
        for j, result in enumerate(vectors, 1):
            key = result.get('key', 'Unknown')
            metadata = result.get('metadata', {})
            distance = result.get('distance', 0.0)
            similarity = 1 - distance  # Convert distance to similarity
            category = metadata.get('category', 'N/A')
            topic = metadata.get('topic', 'N/A')
            print(f"    {j}. {key} (similarity: {similarity:.3f}, category: {category}, topic: {topic})")
        print()
    except Exception as e:
        print(f"  ‚ùå Error in search: {e}")
        print()

print("‚úÖ Basic demo completed successfully!")
print("Note: Semantic search is working correctly - documents are ranked by similarity!")

üîç Testing semantic search...

Query 1: What is artificial intelligence?
  Found 3 results:
    1. doc2 (similarity: 0.822, category: AI, topic: machine_learning)
    2. doc3 (similarity: 0.601, category: AI, topic: nlp)
    3. doc5 (similarity: 0.583, category: AI, topic: deep_learning)

Query 2: How do neural networks work?
  Found 3 results:
    1. doc2 (similarity: 0.822, category: AI, topic: machine_learning)
    2. doc3 (similarity: 0.601, category: AI, topic: nlp)
    3. doc5 (similarity: 0.583, category: AI, topic: deep_learning)

Query 2: How do neural networks work?
  Found 3 results:
    1. doc5 (similarity: 0.745, category: AI, topic: deep_learning)
    2. doc2 (similarity: 0.637, category: AI, topic: machine_learning)
    3. doc3 (similarity: 0.607, category: AI, topic: nlp)

Query 3: Python programming language features
  Found 3 results:
    1. doc5 (similarity: 0.745, category: AI, topic: deep_learning)
    2. doc2 (similarity: 0.637, category: AI, topic: machine_lear

In [26]:
# Test metadata filtering
print("üîç Testing metadata filtering functionality...\n")

# Test query for AI-related content
test_query = "artificial intelligence and machine learning"
query_embedding = get_text_embedding(test_query)

print(f"Query: {test_query}")
print("=" * 60)

# Test 1: Filter by category = "AI"
print("üìä Test 1: Filter by category = 'AI'")
try:
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=5,
        returnMetadata=True,
        filter={
            "operator": "equals",
            "metadata_key": "category",
            "value": "AI"
        }
    )
    
    vectors = results.get('vectors', [])
    print(f"  Found {len(vectors)} AI-related results:")
    for j, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        distance = result.get('distance', 0.0)
        similarity = 1 - distance
        category = metadata.get('category', 'N/A')
        topic = metadata.get('topic', 'N/A')
        print(f"    {j}. {key} (similarity: {similarity:.3f}, category: {category}, topic: {topic})")
except Exception as e:
    print(f"  ‚ùå Error in AI filter: {e}")

print()

# Test 2: Filter by category = "programming"
print("üìä Test 2: Filter by category = 'programming'")
try:
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=5,
        returnMetadata=True,
        filter={
            "operator": "equals",
            "metadata_key": "category", 
            "value": "programming"
        }
    )
    
    vectors = results.get('vectors', [])
    print(f"  Found {len(vectors)} programming-related results:")
    for j, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        distance = result.get('distance', 0.0)
        similarity = 1 - distance
        category = metadata.get('category', 'N/A')
        topic = metadata.get('topic', 'N/A')
        print(f"    {j}. {key} (similarity: {similarity:.3f}, category: {category}, topic: {topic})")
except Exception as e:
    print(f"  ‚ùå Error in programming filter: {e}")

print()

# Test 3: Filter by topic = "nlp"
print("üìä Test 3: Filter by topic = 'nlp'")
try:
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=5,
        returnMetadata=True,
        filter={
            "operator": "equals",
            "metadata_key": "topic",
            "value": "nlp"
        }
    )
    
    vectors = results.get('vectors', [])
    print(f"  Found {len(vectors)} NLP-related results:")
    for j, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        distance = result.get('distance', 0.0)
        similarity = 1 - distance
        category = metadata.get('category', 'N/A')
        topic = metadata.get('topic', 'N/A')
        print(f"    {j}. {key} (similarity: {similarity:.3f}, category: {category}, topic: {topic})")
except Exception as e:
    print(f"  ‚ùå Error in NLP filter: {e}")

print()

# Test 4: Test "in" operator with multiple categories
print("üìä Test 4: Filter by category in ['AI', 'database']")
try:
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=5,
        returnMetadata=True,
        filter={
            "operator": "in",
            "metadata_key": "category",
            "value": ["AI", "database"]
        }
    )
    
    vectors = results.get('vectors', [])
    print(f"  Found {len(vectors)} AI or database results:")
    for j, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        distance = result.get('distance', 0.0)
        similarity = 1 - distance
        category = metadata.get('category', 'N/A')
        topic = metadata.get('topic', 'N/A')
        print(f"    {j}. {key} (similarity: {similarity:.3f}, category: {category}, topic: {topic})")
except Exception as e:
    print(f"  ‚ùå Error in 'in' filter: {e}")

print()

# Test 5: No filter (should return all results)
print("üìä Test 5: No filter (baseline comparison)")
try:
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=5,
        returnMetadata=True
    )
    
    vectors = results.get('vectors', [])
    print(f"  Found {len(vectors)} total results:")
    for j, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        distance = result.get('distance', 0.0)
        similarity = 1 - distance
        category = metadata.get('category', 'N/A')
        topic = metadata.get('topic', 'N/A')
        print(f"    {j}. {key} (similarity: {similarity:.3f}, category: {category}, topic: {topic})")
except Exception as e:
    print(f"  ‚ùå Error in no-filter query: {e}")

print("\n‚úÖ Metadata filtering tests completed!")

üîç Testing metadata filtering functionality...

Query: artificial intelligence and machine learning
üìä Test 1: Filter by category = 'AI'
  Found 5 AI-related results:
    1. doc2 (similarity: 0.894, category: N/A, topic: N/A)
    2. doc2 (similarity: 0.894, category: N/A, topic: N/A)
    3. doc5 (similarity: 0.689, category: N/A, topic: N/A)
    4. doc5 (similarity: 0.689, category: N/A, topic: N/A)
    5. doc3 (similarity: 0.631, category: N/A, topic: N/A)

üìä Test 2: Filter by category = 'programming'
  Found 5 programming-related results:
    1. doc2 (similarity: 0.894, category: N/A, topic: N/A)
    2. doc2 (similarity: 0.894, category: N/A, topic: N/A)
    3. doc5 (similarity: 0.689, category: N/A, topic: N/A)
    4. doc5 (similarity: 0.689, category: N/A, topic: N/A)
    5. doc3 (similarity: 0.631, category: N/A, topic: N/A)

üìä Test 3: Filter by topic = 'nlp'
  Found 5 NLP-related results:
    1. doc2 (similarity: 0.894, category: N/A, topic: N/A)
    2. doc2 (similarity

In [None]:
# Test metadata functionality with fresh bucket after fix
print("? Testing metadata functionality after fixing storage issue...")
print("Creating fresh bucket to test the metadata fix...")

# Create a new bucket and index for testing metadata
test_bucket_name = f"metadata-test-{int(time.time())}"
test_index_name = "metadata-index"

print(f"\nüèóÔ∏è Creating test bucket: {test_bucket_name}")
try:
    response = s3vectors_client.create_vector_bucket(
        vectorBucketName=test_bucket_name
    )
    print(f"‚úÖ Created test bucket: {test_bucket_name}")
except Exception as e:
    print(f"‚ùå Error creating test bucket: {e}")
    raise

print(f"üèóÔ∏è Creating test index: {test_index_name}")
try:
    response = s3vectors_client.create_index(
        vectorBucketName=test_bucket_name,
        indexName=test_index_name,
        dimension=768,
        dataType="float32",
        distanceMetric="cosine"
    )
    print(f"‚úÖ Created test index: {test_index_name}")
except Exception as e:
    print(f"‚ùå Error creating test index: {e}")
    raise

# Insert test documents with explicit metadata
test_documents = [
    {
        "key": "ai_doc1",
        "text": "Machine learning algorithms for artificial intelligence applications.",
        "metadata": {"category": "AI", "topic": "machine_learning", "year": 2024}
    },
    {
        "key": "prog_doc1", 
        "text": "Python programming language fundamentals and syntax.",
        "metadata": {"category": "programming", "topic": "python", "level": "beginner"}
    },
    {
        "key": "ai_doc2",
        "text": "Natural language processing and text understanding systems.",
        "metadata": {"category": "AI", "topic": "nlp", "year": 2024}
    },
    {
        "key": "db_doc1",
        "text": "Vector databases for high-dimensional similarity search.",
        "metadata": {"category": "database", "topic": "vectors", "type": "nosql"}
    }
]

print(f"\nüì¶ Inserting test documents with metadata...")
test_vectors = []
for doc in test_documents:
    embedding = get_text_embedding(doc['text'])
    test_vectors.append({
        "key": doc['key'],
        "data": {"float32": embedding},
        "metadata": doc['metadata']
    })

response = s3vectors_client.put_vectors(
    vectorBucketName=test_bucket_name,
    indexName=test_index_name,
    vectors=test_vectors
)
print(f"‚úÖ Inserted {len(test_vectors)} test documents with metadata")

# Test query with metadata
print(f"\nüîç Testing search with metadata return...")
query_embedding = get_text_embedding("artificial intelligence")
try:
    results = s3vectors_client.query_vectors(
        vectorBucketName=test_bucket_name,
        indexName=test_index_name,
        queryVector={"float32": query_embedding},
        topK=4,
        returnMetadata=True
    )
    
    vectors = results.get('vectors', [])
    print(f"Found {len(vectors)} results with metadata:")
    for j, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        distance = result.get('distance', 0.0)
        similarity = 1 - distance
        print(f"  {j}. {key} (similarity: {similarity:.3f})")
        print(f"     metadata: {metadata}")
        print()
    
    # Test metadata filtering
    print("üîç Testing metadata filtering...")
    
    # Filter by category = "AI"
    ai_results = s3vectors_client.query_vectors(
        vectorBucketName=test_bucket_name,
        indexName=test_index_name,
        queryVector={"float32": query_embedding},
        topK=4,
        returnMetadata=True,
        filter={
            "operator": "equals",
            "metadata_key": "category",
            "value": "AI"
        }
    )
    
    print(f"AI category filter - Found {len(ai_results.get('vectors', []))} results:")
    for result in ai_results.get('vectors', []):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', {})
        print(f"  - {key}: {metadata}")
    
except Exception as e:
    print(f"‚ùå Error in metadata test: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ Metadata functionality test completed!")

In [None]:
# Quick test: Check if metadata is now working with existing data
print("üîç Quick metadata test with existing bucket...")

# Test if metadata is working with the current bucket
try:
    query_embedding = get_text_embedding("artificial intelligence")
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=2,
        returnMetadata=True
    )
    
    print("Results after metadata fix:")
    for i, result in enumerate(results.get('vectors', []), 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', 'None')
        distance = result.get('distance', 0.0)
        print(f"  {i}. {key}: metadata = {metadata}, distance = {distance:.3f}")
        
    # Test a simple filter
    print("\nTesting simple filter...")
    filtered_results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=5,
        returnMetadata=True,
        filter={
            "operator": "equals",
            "metadata_key": "category",
            "value": "AI"
        }
    )
    
    print(f"Filtered results (category=AI): {len(filtered_results.get('vectors', []))} found")
    for result in filtered_results.get('vectors', []):
        print(f"  - {result.get('key')}: {result.get('metadata', 'None')}")
        
except Exception as e:
    print(f"‚ùå Error: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# Simple debug test for metadata functionality
print("üîß Debug: Testing metadata functionality step by step...")

# Step 1: Test basic connection
try:
    print("Step 1: Testing basic connection...")
    query_embedding = get_text_embedding("test")
    print(f"‚úÖ Embedding generated: {len(query_embedding)} dimensions")
except Exception as e:
    print(f"‚ùå Embedding failed: {e}")
    raise

# Step 2: Test simple query without metadata first
try:
    print("\nStep 2: Testing simple query without metadata...")
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=2,
        returnMetadata=False  # Start without metadata
    )
    
    vectors = results.get('vectors', [])
    print(f"‚úÖ Basic query works: Found {len(vectors)} results")
    for i, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        distance = result.get('distance', 0.0)
        print(f"  {i}. {key} (distance: {distance:.3f})")
        
except Exception as e:
    print(f"‚ùå Basic query failed: {e}")
    import traceback
    traceback.print_exc()
    raise

# Step 3: Test query WITH metadata
try:
    print("\nStep 3: Testing query WITH metadata...")
    results = s3vectors_client.query_vectors(
        vectorBucketName=bucket_name,
        indexName=index_name,
        queryVector={"float32": query_embedding},
        topK=2,
        returnMetadata=True  # Now with metadata
    )
    
    vectors = results.get('vectors', [])
    print(f"‚úÖ Metadata query works: Found {len(vectors)} results")
    for i, result in enumerate(vectors, 1):
        key = result.get('key', 'Unknown')
        metadata = result.get('metadata', 'None')
        distance = result.get('distance', 0.0)
        print(f"  {i}. {key} (distance: {distance:.3f})")
        print(f"     metadata: {metadata}")
        
except Exception as e:
    print(f"‚ùå Metadata query failed: {e}")
    import traceback
    traceback.print_exc()

print("\n‚úÖ Debug test completed!")