-
Notifications
You must be signed in to change notification settings - Fork 0
Semantic Vector Search
Temp edited this page Sep 23, 2025
·
1 revision
Last Updated: September 23, 2025 1:48 PM EST
The SQLite MCP Server provides comprehensive semantic search capabilities, enabling AI-native applications with embedding storage, similarity search, and hybrid keyword+semantic ranking. This makes it perfect for recommendation systems, question-answering, and content discovery.
π See Vector/Semantic Search Complete Tutorial for complete workflows, OpenAI/HuggingFace integration examples, and advanced use cases.
| Tool | Description |
|---|---|
create_embeddings_table |
Create tables optimized for storing embeddings with metadata |
store_embedding |
Store embedding vectors with associated metadata |
semantic_search |
Perform semantic similarity search using cosine similarity |
hybrid_search |
Combine FTS5 keyword search with semantic similarity |
calculate_similarity |
Calculate cosine similarity between embedding vectors |
batch_similarity_search |
Perform similarity search with multiple query vectors |
// 1. Create table for embeddings
create_embeddings_table({
"table_name": "documents",
"embedding_dim": 1536
})
// 2. Store embedding with content
store_embedding({
"table_name": "documents",
"embedding": [0.1, -0.3, 0.8, /* ... 1536 dimensions */],
"content": "Your document text here",
"metadata": {"title": "Sample Document", "category": "tech"}
})
// 3. Search similar content
semantic_search({
"table_name": "documents",
"query_embedding": [0.2, -0.1, 0.9, /* ... 1536 dimensions */],
"limit": 5,
"similarity_threshold": 0.7
})// Basic embeddings table
create_embeddings_table({
"table_name": "document_embeddings",
"embedding_dim": 1536 // OpenAI text-embedding-ada-002
})
// Advanced embeddings table with metadata
create_embeddings_table({
"table_name": "product_embeddings",
"embedding_dim": 768, // Sentence-BERT
"metadata_columns": {
"title": "TEXT",
"category": "TEXT",
"price": "REAL",
"created_at": "DATETIME DEFAULT CURRENT_TIMESTAMP"
},
"index_columns": ["category", "created_at"]
})
// Multi-modal embeddings table
create_embeddings_table({
"table_name": "multimodal_embeddings",
"embedding_dim": 512,
"embedding_types": ["text", "image", "audio"],
"metadata_columns": {
"content_type": "TEXT",
"source_url": "TEXT",
"content_hash": "TEXT"
}
})// Store document embedding
store_embedding({
"table_name": "document_embeddings",
"embedding": openai_embedding_vector,
"content": "This is a comprehensive guide to machine learning algorithms...",
"metadata": {
"title": "Machine Learning Guide",
"author": "Dr. Smith",
"category": "education",
"tags": ["ML", "algorithms", "tutorial"],
"word_count": 2500,
"reading_time": 10
}
})
// Store product embedding
store_embedding({
"table_name": "product_embeddings",
"embedding": product_embedding_vector,
"content": "Wireless Bluetooth headphones with noise cancellation",
"metadata": {
"title": "Sony WH-1000XM4 Headphones",
"category": "electronics",
"price": 349.99,
"brand": "Sony",
"rating": 4.5
}
})// Simple semantic search
semantic_search({
"table_name": "document_embeddings",
"query_embedding": query_vector,
"limit": 10,
"similarity_threshold": 0.75
})
// Search with metadata filtering
semantic_search({
"table_name": "document_embeddings",
"query_embedding": query_vector,
"limit": 10,
"where_clause": "category = 'technology' AND word_count > 1000",
"similarity_threshold": 0.7
})// Multi-criteria semantic search
semantic_search({
"table_name": "product_embeddings",
"query_embedding": product_query_vector,
"limit": 20,
"similarity_threshold": 0.6,
"where_clause": "price BETWEEN 100 AND 500 AND category = 'electronics'",
"order_by": "similarity_score DESC, price ASC",
"include_metadata": true
})
// Semantic search with exclusions
semantic_search({
"table_name": "document_embeddings",
"query_embedding": query_vector,
"limit": 15,
"exclude_ids": [123, 456, 789], // Exclude already seen items
"diversity_factor": 0.8, // Promote diverse results
"boost_recent": true // Boost newer content
})// Hybrid search combining FTS5 and semantic similarity
hybrid_search({
"table_name": "document_embeddings",
"fts_table": "documents_fts",
"keyword_query": "machine learning neural networks",
"semantic_embedding": ml_query_vector,
"limit": 10,
"keyword_weight": 0.4,
"semantic_weight": 0.6,
"min_keyword_score": 0.1,
"min_semantic_score": 0.6
})
// Advanced hybrid search with boosting
hybrid_search({
"table_name": "product_embeddings",
"fts_table": "products_fts",
"keyword_query": "wireless headphones bluetooth",
"semantic_embedding": headphones_vector,
"limit": 20,
"keyword_weight": 0.3,
"semantic_weight": 0.5,
"popularity_weight": 0.2, // Boost popular items
"boost_columns": {
"rating": 0.1,
"review_count": 0.05
}
})// Context-aware hybrid search
hybrid_search({
"table_name": "document_embeddings",
"fts_table": "documents_fts",
"keyword_query": "database optimization performance",
"semantic_embedding": context_aware_vector,
"limit": 15,
"context": {
"user_expertise": "advanced",
"content_type": "technical",
"recent_topics": ["SQL", "indexing", "query planning"]
},
"adaptive_weights": true, // Adjust weights based on context
"personalization": true
})// Calculate cosine similarity between vectors
calculate_similarity({
"vector1": embedding_a,
"vector2": embedding_b,
"method": "cosine" // cosine, euclidean, dot_product
})
// Batch similarity calculation
calculate_similarity({
"query_vector": user_query_vector,
"table_name": "document_embeddings",
"vector_column": "embedding",
"limit": 100,
"method": "cosine",
"return_distances": true
})// Compare multiple query vectors at once
batch_similarity_search({
"table_name": "document_embeddings",
"query_embeddings": [
{"vector": query1_vector, "weight": 1.0, "label": "primary"},
{"vector": query2_vector, "weight": 0.7, "label": "secondary"},
{"vector": query3_vector, "weight": 0.5, "label": "context"}
],
"combination_method": "weighted_average",
"limit": 10,
"similarity_threshold": 0.7
})// 1. Create document embeddings table
create_embeddings_table({
"table_name": "article_embeddings",
"embedding_dim": 1536,
"metadata_columns": {
"title": "TEXT",
"category": "TEXT",
"author": "TEXT",
"publish_date": "DATE",
"read_time": "INTEGER",
"view_count": "INTEGER"
}
});
// 2. Store article embeddings
const articles = await getArticlesWithEmbeddings();
for (const article of articles) {
await store_embedding({
"table_name": "article_embeddings",
"embedding": article.embedding,
"content": article.content,
"metadata": {
"title": article.title,
"category": article.category,
"author": article.author,
"publish_date": article.publish_date,
"read_time": article.read_time,
"view_count": article.view_count
}
});
}
// 3. Find similar articles
semantic_search({
"table_name": "article_embeddings",
"query_embedding": current_article_embedding,
"limit": 5,
"where_clause": "category = ? AND id != ?",
"params": [current_article.category, current_article.id],
"similarity_threshold": 0.75
});// 1. Product embeddings with rich metadata
create_embeddings_table({
"table_name": "product_embeddings",
"embedding_dim": 768,
"metadata_columns": {
"name": "TEXT",
"category": "TEXT",
"brand": "TEXT",
"price": "REAL",
"rating": "REAL",
"review_count": "INTEGER",
"in_stock": "BOOLEAN"
}
});
// 2. Intelligent product search
hybrid_search({
"table_name": "product_embeddings",
"fts_table": "products_fts",
"keyword_query": user_search_query,
"semantic_embedding": search_embedding,
"limit": 24,
"keyword_weight": 0.4,
"semantic_weight": 0.6,
"where_clause": "in_stock = 1 AND price BETWEEN ? AND ?",
"params": [min_price, max_price],
"boost_columns": {
"rating": 0.1,
"review_count": 0.05
}
});
// 3. Related products
semantic_search({
"table_name": "product_embeddings",
"query_embedding": current_product_embedding,
"limit": 8,
"where_clause": "category = ? AND id != ? AND in_stock = 1",
"params": [current_product.category, current_product.id],
"similarity_threshold": 0.7,
"diversity_factor": 0.8
});// 1. Knowledge base embeddings
create_embeddings_table({
"table_name": "kb_embeddings",
"embedding_dim": 1536,
"metadata_columns": {
"title": "TEXT",
"category": "TEXT",
"difficulty": "TEXT",
"last_updated": "DATETIME",
"helpful_votes": "INTEGER"
}
});
// 2. Intelligent answer retrieval
hybrid_search({
"table_name": "kb_embeddings",
"fts_table": "knowledge_base_fts",
"keyword_query": customer_question,
"semantic_embedding": question_embedding,
"limit": 5,
"keyword_weight": 0.3,
"semantic_weight": 0.7,
"boost_columns": {
"helpful_votes": 0.1
},
"where_clause": "difficulty <= ?",
"params": [customer_expertise_level]
});
// 3. Similar questions
semantic_search({
"table_name": "question_embeddings",
"query_embedding": customer_question_embedding,
"limit": 10,
"similarity_threshold": 0.8,
"include_metadata": true
});// OpenAI text-embedding-ada-002
create_embeddings_table({
"table_name": "openai_embeddings",
"embedding_dim": 1536
});
// Sentence-BERT models
create_embeddings_table({
"table_name": "sbert_embeddings",
"embedding_dim": 768
});
// Custom/smaller models
create_embeddings_table({
"table_name": "custom_embeddings",
"embedding_dim": 384
});// High-precision search (strict threshold)
semantic_search({
"table_name": "embeddings",
"query_embedding": query_vector,
"similarity_threshold": 0.85,
"limit": 5
});
// Discovery search (lower threshold, more results)
semantic_search({
"table_name": "embeddings",
"query_embedding": query_vector,
"similarity_threshold": 0.6,
"limit": 20,
"diversity_factor": 0.7
});// Balanced hybrid approach
hybrid_search({
"table_name": "content_embeddings",
"fts_table": "content_fts",
"keyword_query": user_query,
"semantic_embedding": query_embedding,
"keyword_weight": 0.4,
"semantic_weight": 0.6,
"limit": 15
});// Regular similarity distribution analysis
read_query({
"query": `
SELECT
ROUND(similarity_score, 1) as score_range,
COUNT(*) as frequency
FROM (
SELECT calculate_similarity(embedding, ?) as similarity_score
FROM embeddings_table
LIMIT 1000
)
GROUP BY score_range
ORDER BY score_range DESC
`,
"params": [sample_query_vector]
});- Vector-Index-Optimization - ANN search performance optimization
- Full-Text-Search - FTS5 keyword search capabilities
- Advanced-Text-Processing - Text analysis tools
- MCP-Resources-and-Prompts - Intelligent workflow automation
π§ Semantic Search Tip: Combine semantic search with traditional keyword search for the best user experience. Semantic search understands meaning and context, while keyword search handles exact matches and specific terms.