diff --git a/src/oss/python/integrations/providers/all_providers.mdx b/src/oss/python/integrations/providers/all_providers.mdx index da2212063..23d67eedb 100644 --- a/src/oss/python/integrations/providers/all_providers.mdx +++ b/src/oss/python/integrations/providers/all_providers.mdx @@ -9,3051 +9,3702 @@ Browse the complete collection of integrations available for Python. LangChain P ## Providers - - Custom AI integration platform for enterprise workflows. - - - - Knowledge management platform with AI-powered organization. - - - - Vector database for AI applications with deep learning focus. - - - - Advertising platform for GPT applications and AI services. - - - - Web scraping with natural language queries. - - - - AI21 Labs' Jurassic models for text generation. - - - - Experiment tracking and management platform. - - - - Unified API for multiple AI and ML services. - - - - Decentralized AI computing network platform. - - - - Data integration platform for ETL and ELT pipelines. - - - - Cloud-based spreadsheet and database platform. - - - - Blockchain development platform and APIs. - - - - European AI company's multilingual language models. - - - - Alibaba's cloud computing and AI services. - - - - Alibaba Cloud's real-time analytics database. - - - - Browser automation and web scraping tools. - - - - Approximate nearest neighbors search library. - - - - Claude models for advanced reasoning and conversation. - - - - Distributed computing platform for ML workloads. - - - - Real-time analytical database management system. - - - - Apache Software Foundation tools and libraries. - - - - Web scraping and automation platform. - - - - Apple's machine learning and AI frameworks. - - - - Multi-model database with graph capabilities. - - - - Domain-specific language model training platform. - - - - Geographic information system platform. - - - - Data labeling and annotation platform for NLP. - - - - ML observability and performance monitoring. - - - - AI model monitoring and governance platform. - - - - Academic paper repository and search platform. - - - - Data engineering and pipeline automation platform. - - - - Real-time news search and analysis API. - - - - Speech-to-text and audio intelligence API. - - - - DataStax Astra DB vector database platform. - - - - Data visualization and exploration platform. - - - - Vector database for AI and ML applications. - - - - Amazon Web Services cloud platform and AI services. - - - - Song lyrics database and search platform. - - - - Microsoft Azure AI and cognitive services. - - - - Beijing Academy of AI research and models. - - - - Vector database and semantic search platform. - - - - Multi-modal AI database and storage system. - - - - Chinese language model from Baichuan AI. - - - - Baidu's AI services and language models. - - - - Serverless GPU infrastructure for ML models. - - - - ML model deployment and serving platform. - - - - Serverless GPU computing platform. - - - - HTML and XML parsing library for web scraping. - - - - Bibliography management and citation format. - - - - Chinese video sharing platform integration. - - - - Decentralized AI network and incentive protocol. - - - - Educational technology and learning management. - - - - High-performance analytics and data processing. - - - - AI-powered reading and research assistant. - - - - Cloud content management and collaboration. - - - - Privacy-focused search engine API. - - - - AI knowledge management and retrieval platform. - - - - Web data platform and proxy services. - - - - Headless browser automation platform. - - - - Serverless browser automation service. - - - - ByteDance's AI models and services. - - - - Distributed NoSQL database management system. - - - - AI compute platform with specialized processors. - - - - Serverless GPU platform for AI applications. - - - - No-code AI chatbot and automation platform. - - - - Open-source embedding database for AI apps. - - - - Computer vision and AI model platform. - - - - ML experiment tracking and automation. - - - - Fast columnar database for analytics. - - - - Project management and productivity platform. - - - - Web infrastructure and security services. - - - - Naver's AI assistant and NLP platform. - - - - Time series database for IoT and analytics. - - - - Memory layer for AI applications and agents. - - - - AI knowledge management and retrieval system. - - - - Language AI platform for enterprise applications. - - - - College admissions and education platform. - - - - ML experiment tracking and model management. - - - - AI observability and monitoring platform. - - - - Team collaboration and documentation platform. - - - - Plugin system for AI agents and applications. - - - - Context management for AI applications. - - - - Contextual AI and language understanding. - - - - NoSQL cloud database platform. - - - - Conversational AI platform and chatbot builder. - - - - Distributed SQL database for machine data. - - - - Python bindings for transformer models in C/C++. - - - - Fast inference engine for Transformer models. - - - - Semantic layer for building data applications. - - - - Real-time AI data platform and API. - - - - Alibaba Cloud's vector database service. - - - - Unified analytics platform for big data and ML. - - - - Monitoring and analytics platform for applications. - - - - Log management and analysis platform. - - - - SEO and SERP data API platform. - - - - Natural language to SQL query platform. - - - - Document analysis and structure detection. - - - - Serverless inference for deep learning models. - - - - Vector database for deep learning applications. - - - - Advanced reasoning and coding AI models. - - - - Inference runtime for sparse neural networks. - - - - Dell Technologies AI and computing solutions. - - - - Web data extraction and knowledge graph. - - - - Distributed vector database system. - - - - Communication platform integration and bots. - - - - Discord analytics and moderation tools. - - - - Data structure for multimodal AI applications. - - - - Document processing and AI integration. - - - - Document transformation and processing. - - - - Document AI and semantic processing. - - - - Documentation website generator and platform. - - - - Decentralized knowledge retrieval network. - - - - Cloud storage and file sharing platform. - - - - In-process SQL OLAP database management system. - - - - Privacy-focused search engine integration. - - - - Cloud development environment platform. - - - - Unified API for multiple AI services. - - - - Distributed search and analytics engine. - - - - AI voice synthesis and speech platform. - - - - Framework for creating RAG applications. - - - - Vector database for AI and ML applications. - - - - Ethereum blockchain explorer and analytics. - - - - Serverless AI inference platform. - - - - Note-taking and organization platform. - - - - AI-powered search engine for developers. - - - - Meta's social platform integration and APIs. - - - - Graph database with ultra-low latency. - - - - Serverless, globally distributed database. - - - - Fast and efficient AI model serving. - - - - AI observability and monitoring platform. - - - - Design collaboration and prototyping platform. - - - - Web scraping and crawling API service. - - - - Fast inference platform for open-source models. - - - - Workflow orchestration for ML and data processing. - - - - Financial market data and analytics API. - - - - Fine-tuning platform for language models. - - - - Optimized serving engine for AI models. - - - - Prompt-driven engineering assistant. - - - - Knowledge extraction and NLP platform. - - - - Geographic data analysis with Python. - - - - Version control system integration. - - - - Documentation platform and knowledge base. - - - - Code hosting and collaboration platform. - - - - DevOps platform and code repository. - - - - Tool use framework for AI agents. - - - - Knowledge graph and data platform. - - - - Interpretable AI and model analysis. - - - - Google's AI services and cloud platform. - - - - Google Search API service. - - - - Fully managed NLP-as-a-Service platform. - - - - Open-source LLM ecosystem for local deployment. - - - - AI model training and deployment platform. - - - - Private AI model training platform. - - - - Graph-based retrieval augmented generation. - - - - AI observability and monitoring platform. - - - - Sustainable AI computing platform. - - - - Machine learning library for bibliographic data. - - - - Ultra-fast inference with specialized hardware. - - - - Project Gutenberg digital library access. - - - - Tech news and discussion platform. - - - - Machine learning research and tools. - - - - LLM observability and monitoring platform. - - - - Real-time interactive analytics service. - - - - HTML to plain text conversion utility. - - - - Huawei Cloud AI services and models. - - - - Open platform for ML models and datasets. - - - - Web automation and scraping platform. - - - - IBM Watson AI and enterprise solutions. - - - - Enterprise AI and system integration. - - - - Repair guides and technical documentation. - - - - Chinese speech and language AI platform. - - - - Internet Movie Script Database access. - - - - Distributed cache and data grid platform. - - - - High-performance embedding inference server. - - - - Observability and monitoring platform. - - - - Intel's AI optimization tools and libraries. - - - - Brazilian payment processing platform. - - - - Vector database and search platform. - - - - AI model gateway and management platform. - - - - Automation server and CI/CD platform. - - - - Neural search framework and cloud platform. - - - - Enterprise NLP and healthcare AI platform. - - - - Open-source note taking and organization. - - - - Time-series vector database platform. - - - - Real-time analytics and database platform. - - - - Browser-based AI writing assistant. - - - - Generative AI platform and model hosting. - - - - Korean natural language processing toolkit. - - - - Embedded graph database management system. - - - - Data labeling and annotation platform. - - - - Git-like version control for data lakes. - - - - Developer-friendly embedded vector database. - - - - Syntactic sugar and utilities for LangChain. - - - - Bias testing framework for language models. - - - - LLM engineering platform and observability. - - - - PostgreSQL vector database extension. - - - - Alibaba Cloud's multi-model database service. - - - - Real-time job market data and search. - - - - Unified interface for 100+ LLM APIs. - - - - Data framework for LLM applications. - - - - Port of Meta's LLaMA model in C/C++. - - - - Edge computing platform for LLaMA models. - - - - Single-file executable for running LLMs. - - - - Observability platform for LLM applications. - - - - Self-hosted OpenAI-compatible API server. - - - - LLM data management and observability. - - - - Open-source relational database management. - - - - Brazilian Portuguese language model. - - - - End-to-end vector search engine. - - - - Wikipedia and MediaWiki data processing. - - - - Lightning-fast search engine platform. - - - - Distributed memory caching system. - - - - Real-time graph database platform. - - - - Managed vector search and retrieval. - - - - Microsoft Azure AI and enterprise services. - - - - Open-source vector database for AI applications. - - - - AI layer for databases and data platforms. - - - - Chinese AI company's language models. - - - - Efficient open-source language models. - - - - ML lifecycle management platform. - - - - Experiment tracking and model registry. - - - - Apple's machine learning framework. - - - - Serverless cloud computing for data science. - - - - Alibaba's open-source model hub. - - - - Payment operations and treasury management. - - - - Serverless cache and vector index. - - - - Document-based NoSQL database platform. - - - - Cloud-hosted MongoDB with vector search. - - - - Serverless analytics with DuckDB in the cloud. - - - - Long-term memory for AI conversations. - - - - SQL-compatible vector database platform. - - - - Naver's AI services and language models. - - - - AI cloud platform and infrastructure. - - - - Native graph database and analytics platform. - - - - Decentralized AI computing network. - - - - Web intelligence and data extraction. - - - - Production-ready NLP API platform. - - - - Open-source embedding models and tools. - - - - All-in-one workspace and collaboration platform. - - - - AI-powered search and understanding platform. - - - - NVIDIA's AI computing platform and models. - - - - Connected note-taking and knowledge management. - - - - Distributed relational database system. - - - - Oracle Cloud Infrastructure AI services. - - - - Efficient AI compute and model serving. - - - - Run large language models locally. - - - - RDF database and semantic graph platform. - - - - GPT models and comprehensive AI platform. - - - - Safe, Open, High-Performance — PDF for AI - - - - AI model training and fine-tuning platform. - - - - Operating LLMs in production environment. - - - - Distributed search and analytics suite. - - - - Weather data and forecasting API. - - - - Oracle's AI and machine learning services. - - - - Team knowledge base and wiki platform. - - - - Structured generation for language models. - - - - Web scraping and proxy services. - - - - Data analysis and manipulation library. - - - - Real-time news and media monitoring. - - - - Authorization and access control platform. - - - - AI-powered search and reasoning engine. - - - - Distributed inference for large language models. - - - - PostgreSQL vector embedding extensions. - - - - Vector similarity search for PostgreSQL. - - - - Managed vector database for ML applications. - - - - ML pipeline and model deployment platform. - - - - AI-powered content moderation platform. - - - - AI gateway and observability platform. - - - - Fine-tuning platform for large language models. - - - - AI model security and compliance platform. - - - - AI platform for model deployment and management. - - - - Logic programming language integration. - - - - Prompt engineering and observability platform. - - - - Universal API for SaaS integrations. - - - - Biomedical literature database access. - - - - Markdown content extraction and processing. - - - - Conversational AI model platform. - - - - PDF processing optimized for LLM ingestion. - - - - Vector similarity search engine. - - - - RAG toolkit with ColBERT indexing. - - - - BM25 ranking algorithm implementation. - - - - Scalable model serving framework. - - - - Prompt injection detection and prevention. - - - - Social media platform integration and APIs. - - - - In-memory data structure store and cache. - - - - AI memory and context management. - - - - Cloud platform for running ML models. - - - - Research and note-taking platform. - - - - Python automation and RPA platform. - - - - Real-time analytics database platform. - - - - GPU cloud platform for AI workloads. - - - - CRM platform and business automation. - - - - AI platform with specialized hardware. - - - - Enterprise software and AI solutions. - - - - AI-powered web scraping framework. - - - - Web scraping API and proxy service. - - - - Real-time search engine results API. - - - - Privacy-respecting metasearch engine. - - - - Vector database for semantic search. - - - - Google Search results scraping API. - - - - Decentralized AI inference protocol. - - - - Distributed database with vector capabilities. - - - - Machine learning library for Python. - - - - Business communication and collaboration. - - - - Cloud data platform and analytics. - - - - Industrial-strength NLP library. - - - - Unified analytics engine for big data. - - - - iFlytek's multilingual language model. - - - - Payment orchestration platform. - - - - Embedded relational database engine. - - - - Q&A platform network integration. - - - - High-performance analytical database. - - - - GPU cloud platform for ML acceleration. - - - - Web app framework for data science. - - - - Online payment processing platform. - - - - Open-source Firebase alternative. - - - - Multi-model database for modern applications. - - - - Conversation intelligence platform. - - - - Data visualization and business intelligence. - - - - Project management platform for agile teams. - - - - Alibaba Cloud's in-memory database. - - - - AI-optimized search API for applications. - - - - Messaging platform and bot integration. - - - - Tencent Cloud AI services and models. - - - - Collection of ready-to-use datasets. - - - - Data infrastructure for ML applications. - - - - Distributed SQL database platform. - - - - Scalable graph database and analytics. - - - - Globally distributed database platform. - - - - Entity resolution and data matching. - - - - Fast inference for open-source models. - - - - HTML to Markdown conversion utility. - - - - Extended toolkit for LangChain applications. - - - - Big data platform and analytics suite. - - - - Visual project management and collaboration. - - - - LLM evaluation and analytics platform. - - - - ML platform for model deployment. - - - - Evaluation framework for LLM applications. - - - - Social media platform integration. - - - - Fast and typo-tolerant search engine. - - - - Data extraction and processing platform. - - - - Document processing and data extraction. - - - - Document AI and OCR platform. - - - - Serverless data platform for Redis and Kafka. - - - - ML observability and evaluation platform. - - - - Single-file vector search engine. - - - - AI platform for healthcare applications. - - - - AI-powered data analysis platform. - - - - Visual data management system. - - - - Distributed vector search engine. - - - - Neural search platform with built-in understanding. - - - - Vector database and semantic search. - - - - Big data serving engine for vector search. - - - - Simple vector database for embeddings. - - - - Embedding models and semantic search. - - - - ML experiment tracking and collaboration. - - - - Experiment tracking and model management. - - - - LLM tracing and observability. - - - - Weather data and forecasting services. - - - - Open-source vector database with GraphQL. - - - - Messaging platform integration and automation. - - - - AI observability and data monitoring. - - - - Wikipedia content access and search. - - - - Computational knowledge engine. - - - - Enterprise AI writing platform. - - - - xAI's Grok models for conversational AI. - - - - Serverless database with vector search. - - - - Distributed inference framework for LLMs. - - - - Yahoo services and data integration. - - - - Yandex AI services and language models. - - - - Yandex Database distributed storage system. - - - - AI agent framework and development platform. - - - - Data warehouse and analytics platform. - - - - 01.AI's bilingual language models. - - - - You.com search engine and AI platform. - - - - Video platform integration and content access. - - - - Long-term memory for AI assistants. - - - - High-performance vector database. - - - - ChatGLM and other Chinese language models. - - - - Managed Milvus vector database service. - - - - Reference management and research tool. - + + Custom AI integration platform for enterprise workflows. + + + + Knowledge management platform with AI-powered organization. + + + + Vector database for AI applications with deep learning focus. + + + + Advertising platform for GPT applications and AI services. + + + + Web scraping with natural language queries. + + + + AI21 Labs' Jurassic models for text generation. + + + + Experiment tracking and management platform. + + + + Unified API for multiple AI and ML services. + + + + Decentralized AI computing network platform. + + + + Data integration platform for ETL and ELT pipelines. + + + + Cloud-based spreadsheet and database platform. + + + + Blockchain development platform and APIs. + + + + European AI company's multilingual language models. + + + + Alibaba's cloud computing and AI services. + + + + Alibaba Cloud's real-time analytics database. + + + + Browser automation and web scraping tools. + + + + Approximate nearest neighbors search library. + + + + Claude models for advanced reasoning and conversation. + + + + Distributed computing platform for ML workloads. + + + + Real-time analytical database management system. + + + + Apache Software Foundation tools and libraries. + + + + Web scraping and automation platform. + + + + Apple's machine learning and AI frameworks. + + + + Multi-model database with graph capabilities. + + + + Domain-specific language model training platform. + + + + Geographic information system platform. + + + + Data labeling and annotation platform for NLP. + + + + ML observability and performance monitoring. + + + + AI model monitoring and governance platform. + + + + Academic paper repository and search platform. + + + + Data engineering and pipeline automation platform. + + + + Real-time news search and analysis API. + + + + Speech-to-text and audio intelligence API. + + + + DataStax Astra DB vector database platform. + + + + Data visualization and exploration platform. + + + + Vector database for AI and ML applications. + + + + Amazon Web Services cloud platform and AI services. + + + + Song lyrics database and search platform. + + + + Microsoft Azure AI and cognitive services. + + + + Beijing Academy of AI research and models. + + + + Vector database and semantic search platform. + + + + Multi-modal AI database and storage system. + + + + Chinese language model from Baichuan AI. + + + + Baidu's AI services and language models. + + + + Serverless GPU infrastructure for ML models. + + + + ML model deployment and serving platform. + + + + Serverless GPU computing platform. + + + + HTML and XML parsing library for web scraping. + + + + Bibliography management and citation format. + + + + Chinese video sharing platform integration. + + + + Decentralized AI network and incentive protocol. + + + + Educational technology and learning management. + + + + High-performance analytics and data processing. + + + + AI-powered reading and research assistant. + + + + Cloud content management and collaboration. + + + + Privacy-focused search engine API. + + + + AI knowledge management and retrieval platform. + + + + Web data platform and proxy services. + + + + Headless browser automation platform. + + + + Serverless browser automation service. + + + + ByteDance's AI models and services. + + + + Distributed NoSQL database management system. + + + + AI compute platform with specialized processors. + + + + Serverless GPU platform for AI applications. + + + + No-code AI chatbot and automation platform. + + + + Open-source embedding database for AI apps. + + + + Computer vision and AI model platform. + + + + ML experiment tracking and automation. + + + + Fast columnar database for analytics. + + + + Project management and productivity platform. + + + + Web infrastructure and security services. + + + + Naver's AI assistant and NLP platform. + + + + Time series database for IoT and analytics. + + + + Memory layer for AI applications and agents. + + + + AI knowledge management and retrieval system. + + + + Language AI platform for enterprise applications. + + + + College admissions and education platform. + + + + ML experiment tracking and model management. + + + + AI observability and monitoring platform. + + + + Team collaboration and documentation platform. + + + + Plugin system for AI agents and applications. + + + + Context management for AI applications. + + + + Contextual AI and language understanding. + + + + NoSQL cloud database platform. + + + + Conversational AI platform and chatbot builder. + + + + Distributed SQL database for machine data. + + + + Python bindings for transformer models in C/C++. + + + + Fast inference engine for Transformer models. + + + + Semantic layer for building data applications. + + + + Real-time AI data platform and API. + + + + Alibaba Cloud's vector database service. + + + + Unified analytics platform for big data and ML. + + + + Monitoring and analytics platform for applications. + + + + Log management and analysis platform. + + + + SEO and SERP data API platform. + + + + Natural language to SQL query platform. + + + + Document analysis and structure detection. + + + + Serverless inference for deep learning models. + + + + Vector database for deep learning applications. + + + + Advanced reasoning and coding AI models. + + + + Inference runtime for sparse neural networks. + + + + Dell Technologies AI and computing solutions. + + + + Web data extraction and knowledge graph. + + + + Distributed vector database system. + + + + Communication platform integration and bots. + + + + Discord analytics and moderation tools. + + + + Data structure for multimodal AI applications. + + + + Document processing and AI integration. + + + + Document transformation and processing. + + + + Document AI and semantic processing. + + + + Documentation website generator and platform. + + + + Decentralized knowledge retrieval network. + + + + Cloud storage and file sharing platform. + + + + In-process SQL OLAP database management system. + + + + Privacy-focused search engine integration. + + + + Cloud development environment platform. + + + + Unified API for multiple AI services. + + + + Distributed search and analytics engine. + + + + AI voice synthesis and speech platform. + + + + Framework for creating RAG applications. + + + + Vector database for AI and ML applications. + + + + Ethereum blockchain explorer and analytics. + + + + Serverless AI inference platform. + + + + Note-taking and organization platform. + + + + AI-powered search engine for developers. + + + + Meta's social platform integration and APIs. + + + + Graph database with ultra-low latency. + + + + Serverless, globally distributed database. + + + + Fast and efficient AI model serving. + + + + AI observability and monitoring platform. + + + + Design collaboration and prototyping platform. + + + + Web scraping and crawling API service. + + + + Fast inference platform for open-source models. + + + + Workflow orchestration for ML and data processing. + + + + Financial market data and analytics API. + + + + Fine-tuning platform for language models. + + + + Optimized serving engine for AI models. + + + + Prompt-driven engineering assistant. + + + + Knowledge extraction and NLP platform. + + + + Geographic data analysis with Python. + + + + Version control system integration. + + + + Documentation platform and knowledge base. + + + + Code hosting and collaboration platform. + + + + DevOps platform and code repository. + + + + Tool use framework for AI agents. + + + + Knowledge graph and data platform. + + + + Interpretable AI and model analysis. + + + + Google's AI services and cloud platform. + + + + Google Search API service. + + + + Fully managed NLP-as-a-Service platform. + + + + Open-source LLM ecosystem for local deployment. + + + + AI model training and deployment platform. + + + + Private AI model training platform. + + + + Graph-based retrieval augmented generation. + + + + AI observability and monitoring platform. + + + + Sustainable AI computing platform. + + + + Machine learning library for bibliographic data. + + + + Ultra-fast inference with specialized hardware. + + + + Project Gutenberg digital library access. + + + + Tech news and discussion platform. + + + + Machine learning research and tools. + + + + LLM observability and monitoring platform. + + + + Real-time interactive analytics service. + + + + HTML to plain text conversion utility. + + + + Huawei Cloud AI services and models. + + + + Open platform for ML models and datasets. + + + + Web automation and scraping platform. + + + + IBM Watson AI and enterprise solutions. + + + + Enterprise AI and system integration. + + + + Repair guides and technical documentation. + + + + Chinese speech and language AI platform. + + + + Internet Movie Script Database access. + + + + Distributed cache and data grid platform. + + + + High-performance embedding inference server. + + + + Observability and monitoring platform. + + + + Intel's AI optimization tools and libraries. + + + + Brazilian payment processing platform. + + + + Vector database and search platform. + + + + AI model gateway and management platform. + + + + Automation server and CI/CD platform. + + + + Neural search framework and cloud platform. + + + + Enterprise NLP and healthcare AI platform. + + + + Open-source note taking and organization. + + + + Time-series vector database platform. + + + + Real-time analytics and database platform. + + + + Browser-based AI writing assistant. + + + + Generative AI platform and model hosting. + + + + Korean natural language processing toolkit. + + + + Embedded graph database management system. + + + + Data labeling and annotation platform. + + + + Git-like version control for data lakes. + + + + Developer-friendly embedded vector database. + + + + Syntactic sugar and utilities for LangChain. + + + + Bias testing framework for language models. + + + + LLM engineering platform and observability. + + + + PostgreSQL vector database extension. + + + + Alibaba Cloud's multi-model database service. + + + + Real-time job market data and search. + + + + Unified interface for 100+ LLM APIs. + + + + Data framework for LLM applications. + + + + Port of Meta's LLaMA model in C/C++. + + + + Edge computing platform for LLaMA models. + + + + Single-file executable for running LLMs. + + + + Observability platform for LLM applications. + + + + Self-hosted OpenAI-compatible API server. + + + + LLM data management and observability. + + + + Open-source relational database management. + + + + Brazilian Portuguese language model. + + + + End-to-end vector search engine. + + + + Wikipedia and MediaWiki data processing. + + + + Lightning-fast search engine platform. + + + + Distributed memory caching system. + + + + Real-time graph database platform. + + + + Managed vector search and retrieval. + + + + Microsoft Azure AI and enterprise services. + + + + Open-source vector database for AI applications. + + + + AI layer for databases and data platforms. + + + + Chinese AI company's language models. + + + + Efficient open-source language models. + + + + ML lifecycle management platform. + + + + Experiment tracking and model registry. + + + + Apple's machine learning framework. + + + + Serverless cloud computing for data science. + + + + Alibaba's open-source model hub. + + + + Payment operations and treasury management. + + + + Serverless cache and vector index. + + + + Document-based NoSQL database platform. + + + + Cloud-hosted MongoDB with vector search. + + + + Serverless analytics with DuckDB in the cloud. + + + + Long-term memory for AI conversations. + + + + SQL-compatible vector database platform. + + + + Naver's AI services and language models. + + + + AI cloud platform and infrastructure. + + + + Native graph database and analytics platform. + + + + Decentralized AI computing network. + + + + Web intelligence and data extraction. + + + + Production-ready NLP API platform. + + + + Open-source embedding models and tools. + + + + All-in-one workspace and collaboration platform. + + + + AI-powered search and understanding platform. + + + + NVIDIA's AI computing platform and models. + + + + Connected note-taking and knowledge management. + + + + Distributed relational database system. + + + + Oracle Cloud Infrastructure AI services. + + + + Efficient AI compute and model serving. + + + + Run large language models locally. + + + + RDF database and semantic graph platform. + + + + GPT models and comprehensive AI platform. + + + + Safe, Open, High-Performance — PDF for AI + + + + AI model training and fine-tuning platform. + + + + Operating LLMs in production environment. + + + + Distributed search and analytics suite. + + + + Weather data and forecasting API. + + + + Oracle's AI and machine learning services. + + + + Team knowledge base and wiki platform. + + + + Structured generation for language models. + + + + Web scraping and proxy services. + + + + Data analysis and manipulation library. + + + + Payment processing and financial tools. + + + + Real-time news and media monitoring. + + + + Authorization and access control platform. + + + + AI-powered search and reasoning engine. + + + + Distributed inference for large language models. + + + + PostgreSQL vector embedding extensions. + + + + Vector similarity search for PostgreSQL. + + + + Managed vector database for ML applications. + + + + ML pipeline and model deployment platform. + + + + AI-powered content moderation platform. + + + + AI gateway and observability platform. + + + + Fine-tuning platform for large language models. + + + + AI model security and compliance platform. + + + + AI platform for model deployment and management. + + + + Logic programming language integration. + + + + Prompt engineering and observability platform. + + + + Universal API for SaaS integrations. + + + + Biomedical literature database access. + + + + Markdown content extraction and processing. + + + + Conversational AI model platform. + + + + PDF processing optimized for LLM ingestion. + + + + Vector similarity search engine. + + + + RAG toolkit with ColBERT indexing. + + + + BM25 ranking algorithm implementation. + + + + Scalable model serving framework. + + + + Prompt injection detection and prevention. + + + + Social media platform integration and APIs. + + + + In-memory data structure store and cache. + + + + AI memory and context management. + + + + Cloud platform for running ML models. + + + + Research and note-taking platform. + + + + Python automation and RPA platform. + + + + Real-time analytics database platform. + + + + GPU cloud platform for AI workloads. + + + + CRM platform and business automation. + + + + AI platform with specialized hardware. + + + + Enterprise software and AI solutions. + + + + AI-powered web scraping framework. + + + + Web scraping API and proxy service. + + + + Real-time search engine results API. + + + + Privacy-respecting metasearch engine. + + + + Vector database for semantic search. + + + + Google Search results scraping API. + + + + Decentralized AI inference protocol. + + + + Distributed database with vector capabilities. + + + + Machine learning library for Python. + + + + Business communication and collaboration. + + + + Cloud data platform and analytics. + + + + Industrial-strength NLP library. + + + + Unified analytics engine for big data. + + + + iFlytek's multilingual language model. + + + + Payment orchestration platform. + + + + Embedded relational database engine. + + + + Q&A platform network integration. + + + + High-performance analytical database. + + + + GPU cloud platform for ML acceleration. + + + + Web app framework for data science. + + + + Online payment processing platform. + + + + Open-source Firebase alternative. + + + + Context-aware retrieval using multiple space types. + + + + Multi-model database for modern applications. + + + + Conversation intelligence platform. + + + + Data visualization and business intelligence. + + + + Project management platform for agile teams. + + + + Alibaba Cloud's in-memory database. + + + + AI-optimized search API for applications. + + + + Messaging platform and bot integration. + + + + Tencent Cloud AI services and models. + + + + Collection of ready-to-use datasets. + + + + Data infrastructure for ML applications. + + + + Distributed SQL database platform. + + + + Scalable graph database and analytics. + + + + Globally distributed database platform. + + + + Entity resolution and data matching. + + + + Fast inference for open-source models. + + + + HTML to Markdown conversion utility. + + + + Extended toolkit for LangChain applications. + + + + Big data platform and analytics suite. + + + + Visual project management and collaboration. + + + + LLM evaluation and analytics platform. + + + + ML platform for model deployment. + + + + Evaluation framework for LLM applications. + + + + Social media platform integration. + + + + Fast and typo-tolerant search engine. + + + + Data extraction and processing platform. + + + + Document processing and data extraction. + + + + Document AI and OCR platform. + + + + Serverless data platform for Redis and Kafka. + + + + ML observability and evaluation platform. + + + + Single-file vector search engine. + + + + AI platform for healthcare applications. + + + + AI-powered data analysis platform. + + + + Visual data management system. + + + + Distributed vector search engine. + + + + Neural search platform with built-in understanding. + + + + Vector database and semantic search. + + + + Big data serving engine for vector search. + + + + Simple vector database for embeddings. + + + + Embedding models and semantic search. + + + + ML experiment tracking and collaboration. + + + + Experiment tracking and model management. + + + + LLM tracing and observability. + + + + Weather data and forecasting services. + + + + Open-source vector database with GraphQL. + + + + Messaging platform integration and automation. + + + + AI observability and data monitoring. + + + + Wikipedia content access and search. + + + + Computational knowledge engine. + + + + Enterprise AI writing platform. + + + + xAI's Grok models for conversational AI. + + + + Serverless database with vector search. + + + + Distributed inference framework for LLMs. + + + + Yahoo services and data integration. + + + + Yandex AI services and language models. + + + + Yandex Database distributed storage system. + + + + AI agent framework and development platform. + + + + Data warehouse and analytics platform. + + + + 01.AI's bilingual language models. + + + + You.com search engine and AI platform. + + + + Video platform integration and content access. + + + + Long-term memory for AI assistants. + + + + High-performance vector database. + + + + ChatGLM and other Chinese language models. + + + + Managed Milvus vector database service. + + + + Reference management and research tool. + + + +## Chat Models + + + + Custom AI chat integration platform. + + + + AI21 Labs' Jurassic models for conversation. + + + + Unified API for multiple chat models. + + + + Alibaba Cloud's model serving platform. + + + + Claude models for advanced reasoning. + + + + Function calling with Claude models. + + + + Microsoft Azure AI chat services. + + + + OpenAI models through Azure platform. + + + + Azure Machine Learning chat endpoints. + + + + Baichuan AI's Chinese language models. + + + + Baidu's Qianfan large model platform. + + + + Foundation models through Amazon Bedrock. + + + + Ultra-fast inference with Cerebras processors. + + + + AI models on Cloudflare's edge platform. + + + + Cohere's language models for conversation. + + + + Context-aware conversational AI. + + + + ByteDance's conversational AI platform. + + + + Real-time AI data platform. + + + + Foundation models on Databricks platform. + + + + Serverless inference for chat models. + + + + Advanced reasoning and coding models. + + + + Unified API for multiple chat providers. + + + + Baidu's ERNIE language model. + + + + Serverless AI inference platform. + + + + Optimized model serving platform. + + + + Fast inference for open-source models. + + + + Optimized serving engine for chat models. + + + + Interpretable AI chat models. + + + + Google's Gemini models for conversation. + + + + Enterprise AI platform with PaLM models. + + + + Route requests across multiple GPT providers. + + + + Private AI model training and chat. + + + + Sustainable AI computing platform. + + + + Ultra-fast inference with specialized hardware. + + + + Open-source models via Hugging Face. + + + + IBM's enterprise AI foundation models. + + + + Jina's conversational AI models. + + + + Real-time analytics with chat interface. + + + + Generative AI platform for chat models. + + + + Unified interface for 100+ chat APIs. + + + + Hosted Llama models via API. + + + + Edge computing for Llama models. + + + + Meta's Llama 2 chat models. + + + + Local inference with llama.cpp. + + + + Brazilian Portuguese conversational AI. + + + + Chinese AI company's chat models. + + + + Mistral's efficient language models. + + + + Apple's machine learning framework. + + + + Alibaba's model hub chat interface. + + + + Moonshot AI's conversational models. + + + + Naver's HyperCLOVA language models. + + + + AI cloud platform for chat models. + + + + Decentralized AI computing network. + + + + NVIDIA's foundation model endpoints. + + + + Oracle Cloud Infrastructure data science. + + + + Oracle's generative AI services. + + + + Efficient AI compute and model serving. + + + + Run large language models locally. + + + + GPT models and OpenAI's chat platform. + + + + Structured generation for language models. + + + + AI-powered search and reasoning. + + + + AI content moderation platform. + + + + Secure and compliant AI models. + + + + AI platform for model deployment. + + + + OpenAI with PromptLayer observability. + + + + Alibaba's Qwen language models. + + + + Alibaba's reasoning-focused model. + + + + Multimodal AI models from Reka. + + + + GPU cloud platform for chat models. + + + + AI platform with specialized hardware. + + + + SambaNova's enterprise AI platform. + + + + AI workflow and automation platform. + + + + AI models on Snowflake data platform. + + + + iFlytek's Spark language models. + + + + Conversation intelligence platform. + + + + Tencent's Hunyuan language models. + + + + Fast inference for open-source models. + + + + Alibaba's Tongyi Qianwen models. + + + + Document AI and chat models. + + + + Fast and memory-efficient inference. + + + + ByteDance's model-as-a-service platform. + + + + Enterprise AI writing platform. + + + + xAI's Grok models for conversation. + + + + Distributed inference framework. + + + + Yandex's language models and AI. + + + + 01.AI's bilingual language models. + + + + IEIT Systems' Yuan 2.0 models. + + + + ChatGLM and other Chinese models. + diff --git a/src/oss/python/integrations/providers/overview.mdx b/src/oss/python/integrations/providers/overview.mdx index 577ccbe59..db2cac159 100644 --- a/src/oss/python/integrations/providers/overview.mdx +++ b/src/oss/python/integrations/providers/overview.mdx @@ -12,7 +12,7 @@ LangChain Python offers an extensive ecosystem with 1000+ integrations across ch Set up your project with our quickstart guide. - + Explore endpoints, parameters, and examples for your API. diff --git a/src/oss/python/integrations/providers/superlinked.mdx b/src/oss/python/integrations/providers/superlinked.mdx new file mode 100644 index 000000000..656aae984 --- /dev/null +++ b/src/oss/python/integrations/providers/superlinked.mdx @@ -0,0 +1,136 @@ +--- +description: LangChain integration package for the Superlinked retrieval stack +title: Superlinked +--- + +### Overview + +Superlinked enables context-aware retrieval using multiple space types (text similarity, categorical, numerical, recency, and more). The `langchain-superlinked` package provides a LangChain-native `SuperlinkedRetriever` that plugs directly into your RAG chains. + +### Links + +- [Integration repository](https://github.com/superlinked/langchain-superlinked) +- [Superlinked core repository](https://links.superlinked.com/langchain_repo_sl) +- [Article: Build RAG using LangChain & Superlinked](https://links.superlinked.com/langchain_article) + +### Install + +```bash +pip install -U langchain-superlinked superlinked +``` + +### Quickstart + +```python +import superlinked.framework as sl +from langchain_superlinked import SuperlinkedRetriever + +# 1) Define schema +class DocumentSchema(sl.Schema): + id: sl.IdField + content: sl.String + +doc_schema = DocumentSchema() + +# 2) Define space and index +text_space = sl.TextSimilaritySpace( + text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" +) +doc_index = sl.Index([text_space]) + +# 3) Define query +query = ( + sl.Query(doc_index) + .find(doc_schema) + .similar(text_space.text, sl.Param("query_text")) + .select([doc_schema.content]) + .limit(sl.Param("limit")) +) + +# 4) Minimal app setup +source = sl.InMemorySource(schema=doc_schema) +executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index]) +app = executor.run() +source.put([ + {"id": "1", "content": "Machine learning algorithms process data efficiently."}, + {"id": "2", "content": "Natural language processing understands human language."}, +]) + +# 5) LangChain retriever +retriever = SuperlinkedRetriever( + sl_client=app, sl_query=query, page_content_field="content" +) + +# Search +docs = retriever.invoke("artificial intelligence", limit=2) +for d in docs: + print(d.page_content) +``` + +### What the retriever expects (App and Query) + +The retriever takes two core inputs: + +- `sl_client`: a Superlinked App created by running an executor (e.g., `InMemoryExecutor(...).run()`) +- `sl_query`: a `QueryDescriptor` returned by chaining `sl.Query(...).find(...).similar(...).select(...).limit(...)` + +Minimal setup: + +```python +import superlinked.framework as sl +from langchain_superlinked import SuperlinkedRetriever + +class Doc(sl.Schema): + id: sl.IdField + content: sl.String + +doc = Doc() +space = sl.TextSimilaritySpace(text=doc.content, model="sentence-transformers/all-MiniLM-L6-v2") +index = sl.Index([space]) + +query = ( + sl.Query(index) + .find(doc) + .similar(space.text, sl.Param("query_text")) + .select([doc.content]) + .limit(sl.Param("limit")) +) + +source = sl.InMemorySource(schema=doc) +app = sl.InMemoryExecutor(sources=[source], indices=[index]).run() + +retriever = SuperlinkedRetriever(sl_client=app, sl_query=query, page_content_field="content") +``` + +Note: For a persistent vector DB, pass `vector_database=...` to the executor (e.g., Qdrant) before `.run()`. + +### Use within a chain + +```python +from langchain_core.runnables import RunnablePassthrough +from langchain_core.prompts import ChatPromptTemplate +from langchain_openai import ChatOpenAI + +def format_docs(docs): + return "\n\n".join(doc.page_content for doc in docs) + +prompt = ChatPromptTemplate.from_template( + """ + Answer based on context:\n\nContext: {context}\nQuestion: {question} + """ +) + +chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()} + | prompt + | ChatOpenAI()) + +answer = chain.invoke("How does machine learning work?") +``` + +### Resources + +- [PyPI: langchain-superlinked](https://pypi.org/project/langchain-superlinked/) +- [PyPI: superlinked](https://pypi.org/project/superlinked/) +- [Source repository](https://github.com/superlinked/langchain-superlinked) +- [Superlinked core repository](https://links.superlinked.com/langchain_repo_sl) +- [Build RAG using LangChain & Superlinked (article)](https://links.superlinked.com/langchain_article) diff --git a/src/oss/python/integrations/retrievers/index.mdx b/src/oss/python/integrations/retrievers/index.mdx index 69bc44af3..2f7832791 100644 --- a/src/oss/python/integrations/retrievers/index.mdx +++ b/src/oss/python/integrations/retrievers/index.mdx @@ -22,6 +22,7 @@ The below retrievers allow you to index and search a custom corpus of documents. | [`AzureAISearchRetriever`](/oss/integrations/retrievers/azure_ai_search) | ❌ | ✅ | [`langchain-community`](https://python.langchain.com/api_reference/community/retrievers/langchain_community.retrievers.azure_ai_search.AzureAISearchRetriever.html) | | [`ElasticsearchRetriever`](/oss/integrations/retrievers/elasticsearch_retriever) | ✅ | ✅ | [`langchain-elasticsearch`](https://python.langchain.com/api_reference/elasticsearch/retrievers/langchain_elasticsearch.retrievers.ElasticsearchRetriever.html) | | [`VertexAISearchRetriever`](/oss/integrations/retrievers/google_vertex_ai_search) | ❌ | ✅ | [`langchain-google-community`](https://python.langchain.com/api_reference/google_community/vertex_ai_search/langchain_google_community.vertex_ai_search.VertexAISearchRetriever.html) | +| [`SuperlinkedRetriever`](/oss/integrations/retrievers/superlinked) | ✅ | ❌ | [`langchain-superlinked`](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html) | ## External index @@ -93,6 +94,7 @@ The below retrievers will search over an external index (e.g., constructed from + diff --git a/src/oss/python/integrations/retrievers/superlinked.mdx b/src/oss/python/integrations/retrievers/superlinked.mdx new file mode 100644 index 000000000..c04234f27 --- /dev/null +++ b/src/oss/python/integrations/retrievers/superlinked.mdx @@ -0,0 +1,1159 @@ +--- +title: SuperlinkedRetriever +--- + +> [Superlinked](https://github.com/superlinked/superlinked) is a library for building context-aware vector search applications. It provides multi-modal vector spaces that can handle text similarity, categorical similarity, recency, and numerical values with flexible weighting strategies. + +This will help you get started with the SuperlinkedRetriever [retriever](/docs/concepts/retrievers/). For detailed documentation of all SuperlinkedRetriever features and configurations head to the [API reference](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html). + +### Further reading + +* External article: [Build RAG using LangChain & Superlinked](https://links.superlinked.com/langchain_article) +* Integration repo: [superlinked/langchain-superlinked](https://github.com/superlinked/langchain-superlinked) +* Superlinked core repo: [superlinked/superlinked](https://links.superlinked.com/langchain_repo_sl) + +### Integration details + +| Retriever | Source | Package | +| :--- | :--- | :---: | +[SuperlinkedRetriever](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html) | Multi-modal vector search | langchain-superlinked | + +## Setup + +The SuperlinkedRetriever requires the `langchain-superlinked` package and its peer dependency `superlinked`. You can install these with: + +```bash +pip install -U langchain-superlinked superlinked +``` + +No API keys are required for basic usage as Superlinked can run in-memory or with local vector databases. + +```python +# Optional: Set up for vector database usage +# import os +# os.environ["QDRANT_API_KEY"] = "your-api-key" # For Qdrant +# No setup required for in-memory usage + +``` + +### App and Query: what the retriever needs + +The retriever requires: + +* `sl_client`: a Superlinked App created by an executor's `.run()` +* `sl_query`: a `QueryDescriptor` built via `sl.Query(...).find(...).similar(...).select(...).limit(...)` + +Minimal example: + +```python +import superlinked.framework as sl +from langchain_superlinked import SuperlinkedRetriever + +class Doc(sl.Schema): + id: sl.IdField + content: sl.String + +doc = Doc() +space = sl.TextSimilaritySpace(text=doc.content, model="sentence-transformers/all-MiniLM-L6-v2") +index = sl.Index([space]) + +query = ( + sl.Query(index) + .find(doc) + .similar(space.text, sl.Param("query_text")) + .select([doc.content]) + .limit(sl.Param("limit")) +) + +source = sl.InMemorySource(schema=doc) +app = sl.InMemoryExecutor(sources=[source], indices=[index]).run() + +retriever = SuperlinkedRetriever(sl_client=app, sl_query=query, page_content_field="content") +``` + +For a production setup, create the executor with a vector DB (e.g., Qdrant) and pass it as `vector_database=...` before calling `.run()`. + +## Instantiation + +```python +import superlinked.framework as sl +from langchain_superlinked import SuperlinkedRetriever + + +# 1. Define Schema +class DocumentSchema(sl.Schema): + id: sl.IdField + content: sl.String + + +doc_schema = DocumentSchema() + +# 2. Define Space and Index +text_space = sl.TextSimilaritySpace( + text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" +) +doc_index = sl.Index([text_space]) + +# 3. Define Query +query = ( + sl.Query(doc_index) + .find(doc_schema) + .similar(text_space.text, sl.Param("query_text")) + .select([doc_schema.content]) + .limit(sl.Param("limit")) +) + +# 4. Set up data and app +documents = [ + { + "id": "doc1", + "content": "Machine learning algorithms can process large datasets efficiently.", + }, + { + "id": "doc2", + "content": "Natural language processing enables computers to understand human language.", + }, + { + "id": "doc3", + "content": "Deep learning models require significant computational resources.", + }, + { + "id": "doc4", + "content": "Artificial intelligence is transforming various industries.", + }, + { + "id": "doc5", + "content": "Neural networks are inspired by biological brain structures.", + }, +] + +source = sl.InMemorySource(schema=doc_schema) +executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index]) +app = executor.run() +source.put(documents) + +# 5. Create Retriever +retriever = SuperlinkedRetriever( + sl_client=app, sl_query=query, page_content_field="content", k=3 +) +``` + +## Usage + +```python +# Basic usage +results = retriever.invoke("artificial intelligence and machine learning", limit=2) +for i, doc in enumerate(results, 1): + print(f"Document {i}:") + print(f"Content: {doc.page_content}") + print(f"Metadata: {doc.metadata}") + print("---") +``` + +```python +# Override k parameter at query time +more_results = retriever.invoke("neural networks and deep learning", k=4) +print(f"Retrieved {len(more_results)} documents:") +for i, doc in enumerate(more_results, 1): + print(f"{i}. {doc.page_content[:50]}...") +``` + +## Use within a chain + +Like other retrievers, SuperlinkedRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/). + +We will need a LLM or chat model: + +import ChatModelTabs from "@theme/ChatModelTabs"; + + + +```python +# pip install -qU langchain-openai +import getpass +import os + +if not os.environ.get("OPENAI_API_KEY"): + os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ") + +from langchain_openai import ChatOpenAI + +llm = ChatOpenAI(model="gpt-4o-mini") +``` + +```python +from langchain import hub +from langchain_core.output_parsers import StrOutputParser +from langchain_core.runnables import RunnablePassthrough + +prompt = hub.pull("rlm/rag-prompt") + + +def format_docs(docs): + return "\n\n".join(doc.page_content for doc in docs) + + +rag_chain = ( + {"context": retriever | format_docs, "question": RunnablePassthrough()} + | prompt + | llm + | StrOutputParser() +) + +rag_chain.invoke("What is machine learning and how does it work?") +``` + +## API reference + +For detailed documentation of all SuperlinkedRetriever features and configurations, head to the [API reference](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html). + +""" +SuperlinkedRetriever Usage Examples + +This file demonstrates how to use the SuperlinkedRetriever with different +space configurations to showcase its flexibility across various use cases. +""" + +```python +import superlinked.framework as sl +from datetime import datetime, timedelta +from typing import Optional, List, Dict, Any +from langchain_core.documents import Document + +from langchain_superlinked import SuperlinkedRetriever +``` + +```python +def example_1_simple_text_search(): + """ + Example 1: Simple text-based semantic search + Use case: Basic document retrieval based on content similarity + """ + print("=== Example 1: Simple Text Search ===") + + # 1. Define Schema + class DocumentSchema(sl.Schema): + id: sl.IdField + content: sl.String + + doc_schema = DocumentSchema() + + # 2. Define Space and Index + text_space = sl.TextSimilaritySpace( + text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + doc_index = sl.Index([text_space]) + + # 3. Define Query + query = ( + sl.Query(doc_index) + .find(doc_schema) + .similar(text_space.text, sl.Param("query_text")) + .select([doc_schema.content]) + .limit(sl.Param("limit")) + ) + + # 4. Set up data and app using executor pattern + documents = [ + { + "id": "doc1", + "content": "Machine learning algorithms can process large datasets efficiently.", + }, + { + "id": "doc2", + "content": "Natural language processing enables computers to understand human language.", + }, + { + "id": "doc3", + "content": "Deep learning models require significant computational resources.", + }, + { + "id": "doc4", + "content": "Data science combines statistics, programming, and domain expertise.", + }, + { + "id": "doc5", + "content": "Artificial intelligence is transforming various industries.", + }, + ] + + # Create source and executor + source = sl.InMemorySource(schema=doc_schema) + executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index]) + app = executor.run() + + # Add data to the source after the app is running + source.put(documents) + + # 5. Create Retriever + retriever = SuperlinkedRetriever( + sl_client=app, sl_query=query, page_content_field="content" + ) + + # 6. Use the retriever + results = retriever.invoke("artificial intelligence and machine learning", limit=3) + + print(f"Query: 'artificial intelligence and machine learning'") + print(f"Found {len(results)} documents:") + for i, doc in enumerate(results, 1): + print(f" {i}. {doc.page_content}") + print() + + +def example_2_multi_space_blog_search(): + """ + Example 2: Multi-space blog post search + Use case: Blog search with content, category, and recency + """ + print("=== Example 2: Multi-Space Blog Search ===") + + # 1. Define Schema + class BlogPostSchema(sl.Schema): + id: sl.IdField + title: sl.String + content: sl.String + category: sl.String + published_date: sl.Timestamp + view_count: sl.Integer + + blog_schema = BlogPostSchema() + + # 2. Define Multiple Spaces + # Text similarity for content + content_space = sl.TextSimilaritySpace( + text=blog_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + # Title similarity + title_space = sl.TextSimilaritySpace( + text=blog_schema.title, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + # Category similarity + category_space = sl.CategoricalSimilaritySpace( + category_input=blog_schema.category, + categories=["technology", "science", "business", "health", "travel"], + ) + + # Recency (favor recent posts) + recency_space = sl.RecencySpace( + timestamp=blog_schema.published_date, + period_time_list=[ + sl.PeriodTime(timedelta(days=30)), # Last month + sl.PeriodTime(timedelta(days=90)), # Last 3 months + sl.PeriodTime(timedelta(days=365)), # Last year + ], + ) + + # Popularity (based on view count) + popularity_space = sl.NumberSpace( + number=blog_schema.view_count, + min_value=0, + max_value=10000, + mode=sl.Mode.MAXIMUM, + ) + + # 3. Create Index + blog_index = sl.Index( + [content_space, title_space, category_space, recency_space, popularity_space] + ) + + # 4. Define Query with multiple weighted spaces + blog_query = ( + sl.Query( + blog_index, + weights={ + content_space: sl.Param("content_weight"), + title_space: sl.Param("title_weight"), + category_space: sl.Param("category_weight"), + recency_space: sl.Param("recency_weight"), + popularity_space: sl.Param("popularity_weight"), + }, + ) + .find(blog_schema) + .similar(content_space.text, sl.Param("query_text")) + .select( + [ + blog_schema.title, + blog_schema.content, + blog_schema.category, + blog_schema.published_date, + blog_schema.view_count, + ] + ) + .limit(sl.Param("limit")) + ) + + # 5. Sample blog data + from datetime import datetime + + # Convert datetime objects to unix timestamps (integers) as required by Timestamp schema field + blog_posts = [ + { + "id": "post1", + "title": "Introduction to Machine Learning", + "content": "Machine learning is revolutionizing how we process data and make predictions.", + "category": "technology", + "published_date": int((datetime.now() - timedelta(days=5)).timestamp()), + "view_count": 1500, + }, + { + "id": "post2", + "title": "The Future of AI in Healthcare", + "content": "Artificial intelligence is transforming medical diagnosis and treatment.", + "category": "health", + "published_date": int((datetime.now() - timedelta(days=15)).timestamp()), + "view_count": 2300, + }, + { + "id": "post3", + "title": "Business Analytics with Python", + "content": "Learn how to use Python for business data analysis and visualization.", + "category": "business", + "published_date": int((datetime.now() - timedelta(days=45)).timestamp()), + "view_count": 980, + }, + { + "id": "post4", + "title": "Deep Learning Neural Networks", + "content": "Understanding neural networks and their applications in modern AI.", + "category": "technology", + "published_date": int((datetime.now() - timedelta(days=2)).timestamp()), + "view_count": 3200, + }, + ] + + # Create source and executor + source = sl.InMemorySource(schema=blog_schema) + executor = sl.InMemoryExecutor(sources=[source], indices=[blog_index]) + app = executor.run() + + # Add data to the source after the app is running + source.put(blog_posts) + + # 6. Create Retriever + retriever = SuperlinkedRetriever( + sl_client=app, + sl_query=blog_query, + page_content_field="content", + metadata_fields=["title", "category", "published_date", "view_count"], + ) + + # 7. Demonstrate different weighting strategies + scenarios = [ + { + "name": "Content-focused search", + "params": { + "content_weight": 1.0, + "title_weight": 0.3, + "category_weight": 0.1, + "recency_weight": 0.2, + "popularity_weight": 0.1, + "limit": 3, + }, + }, + { + "name": "Recent posts prioritized", + "params": { + "content_weight": 0.5, + "title_weight": 0.2, + "category_weight": 0.1, + "recency_weight": 1.0, + "popularity_weight": 0.1, + "limit": 3, + }, + }, + { + "name": "Popular posts with category emphasis", + "params": { + "content_weight": 0.6, + "title_weight": 0.3, + "category_weight": 0.8, + "recency_weight": 0.3, + "popularity_weight": 0.9, + "limit": 3, + }, + }, + ] + + query_text = "machine learning and AI applications" + + for scenario in scenarios: + print(f"\n--- {scenario['name']} ---") + print(f"Query: '{query_text}'") + + results = retriever.invoke(query_text, **scenario["params"]) + + for i, doc in enumerate(results, 1): + print( + f" {i}. {doc.metadata['title']} (Category: {doc.metadata['category']}, Views: {doc.metadata['view_count']})" + ) + + print() + + +def example_3_ecommerce_product_search(): + """ + Example 3: E-commerce product search + Use case: Product search with price range, brand preference, and ratings + """ + print("=== Example 3: E-commerce Product Search ===") + + # 1. Define Schema + class ProductSchema(sl.Schema): + id: sl.IdField + name: sl.String + description: sl.String + brand: sl.String + price: sl.Float + rating: sl.Float + category: sl.String + + product_schema = ProductSchema() + + # 2. Define Spaces + description_space = sl.TextSimilaritySpace( + text=product_schema.description, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + name_space = sl.TextSimilaritySpace( + text=product_schema.name, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + brand_space = sl.CategoricalSimilaritySpace( + category_input=product_schema.brand, + categories=["Apple", "Samsung", "Sony", "Nike", "Adidas", "Canon"], + ) + + category_space = sl.CategoricalSimilaritySpace( + category_input=product_schema.category, + categories=["electronics", "clothing", "sports", "photography"], + ) + + # Price space (lower prices get higher scores in MINIMUM mode) + price_space = sl.NumberSpace( + number=product_schema.price, + min_value=10.0, + max_value=2000.0, + mode=sl.Mode.MINIMUM, # Favor lower prices + ) + + # Rating space (higher ratings get higher scores) + rating_space = sl.NumberSpace( + number=product_schema.rating, + min_value=1.0, + max_value=5.0, + mode=sl.Mode.MAXIMUM, # Favor higher ratings + ) + + # 3. Create Index + product_index = sl.Index( + [ + description_space, + name_space, + brand_space, + category_space, + price_space, + rating_space, + ] + ) + + # 4. Define Query + product_query = ( + sl.Query( + product_index, + weights={ + description_space: sl.Param("description_weight"), + name_space: sl.Param("name_weight"), + brand_space: sl.Param("brand_weight"), + category_space: sl.Param("category_weight"), + price_space: sl.Param("price_weight"), + rating_space: sl.Param("rating_weight"), + }, + ) + .find(product_schema) + .similar(description_space.text, sl.Param("query_text")) + .select( + [ + product_schema.name, + product_schema.description, + product_schema.brand, + product_schema.price, + product_schema.rating, + product_schema.category, + ] + ) + .limit(sl.Param("limit")) + ) + + # 5. Sample product data + products = [ + { + "id": "prod1", + "name": "Wireless Bluetooth Headphones", + "description": "High-quality wireless headphones with noise cancellation and long battery life.", + "brand": "Sony", + "price": 299.99, + "rating": 4.5, + "category": "electronics", + }, + { + "id": "prod2", + "name": "Professional DSLR Camera", + "description": "Full-frame DSLR camera perfect for professional photography and videography.", + "brand": "Canon", + "price": 1299.99, + "rating": 4.8, + "category": "photography", + }, + { + "id": "prod3", + "name": "Running Shoes", + "description": "Comfortable running shoes with excellent cushioning and support for athletes.", + "brand": "Nike", + "price": 129.99, + "rating": 4.3, + "category": "sports", + }, + { + "id": "prod4", + "name": "Smartphone with 5G", + "description": "Latest smartphone with 5G connectivity, advanced camera, and all-day battery.", + "brand": "Samsung", + "price": 899.99, + "rating": 4.6, + "category": "electronics", + }, + { + "id": "prod5", + "name": "Bluetooth Speaker", + "description": "Portable Bluetooth speaker with waterproof design and rich sound quality.", + "brand": "Sony", + "price": 79.99, + "rating": 4.2, + "category": "electronics", + }, + ] + + # Create source and executor + source = sl.InMemorySource(schema=product_schema) + executor = sl.InMemoryExecutor(sources=[source], indices=[product_index]) + app = executor.run() + + # Add data to the source after the app is running + source.put(products) + + # 6. Create Retriever + retriever = SuperlinkedRetriever( + sl_client=app, + sl_query=product_query, + page_content_field="description", + metadata_fields=["name", "brand", "price", "rating", "category"], + ) + + # 7. Demonstrate different search strategies + scenarios = [ + { + "name": "Quality-focused search (high ratings matter most)", + "query": "wireless audio device", + "params": { + "description_weight": 0.7, + "name_weight": 0.5, + "brand_weight": 0.2, + "category_weight": 0.3, + "price_weight": 0.1, + "rating_weight": 1.0, # Prioritize high ratings + "limit": 3, + }, + }, + { + "name": "Budget-conscious search (price matters most)", + "query": "electronics device", + "params": { + "description_weight": 0.6, + "name_weight": 0.4, + "brand_weight": 0.1, + "category_weight": 0.2, + "price_weight": 1.0, # Prioritize lower prices + "rating_weight": 0.3, + "limit": 3, + }, + }, + { + "name": "Brand-focused search (brand loyalty)", + "query": "sony products", + "params": { + "description_weight": 0.5, + "name_weight": 0.3, + "brand_weight": 1.0, # Prioritize specific brand + "category_weight": 0.2, + "price_weight": 0.2, + "rating_weight": 0.4, + "limit": 3, + }, + }, + ] + + for scenario in scenarios: + print(f"\n--- {scenario['name']} ---") + print(f"Query: '{scenario['query']}'") + + results = retriever.invoke(scenario["query"], **scenario["params"]) + + for i, doc in enumerate(results, 1): + metadata = doc.metadata + print( + f" {i}. {metadata['name']} ({metadata['brand']}) - ${metadata['price']} - ⭐{metadata['rating']}" + ) + + print() + + +def example_4_news_article_search(): + """ + Example 4: News article search with sentiment and topics + Use case: News search with content, sentiment, topic categorization, and recency + """ + print("=== Example 4: News Article Search ===") + + # 1. Define Schema + class NewsArticleSchema(sl.Schema): + id: sl.IdField + headline: sl.String + content: sl.String + topic: sl.String + sentiment_score: sl.Float # -1 (negative) to 1 (positive) + published_at: sl.Timestamp + source: sl.String + + news_schema = NewsArticleSchema() + + # 2. Define Spaces + content_space = sl.TextSimilaritySpace( + text=news_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + headline_space = sl.TextSimilaritySpace( + text=news_schema.headline, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + topic_space = sl.CategoricalSimilaritySpace( + category_input=news_schema.topic, + categories=[ + "technology", + "politics", + "business", + "sports", + "entertainment", + "science", + ], + ) + + source_space = sl.CategoricalSimilaritySpace( + category_input=news_schema.source, + categories=["Reuters", "BBC", "CNN", "TechCrunch", "Bloomberg"], + ) + + # Sentiment space (can be configured to prefer positive or negative news) + sentiment_space = sl.NumberSpace( + number=news_schema.sentiment_score, + min_value=-1.0, + max_value=1.0, + mode=sl.Mode.MAXIMUM, # Default to preferring positive news + ) + + # Recency space + recency_space = sl.RecencySpace( + timestamp=news_schema.published_at, + period_time_list=[ + sl.PeriodTime(timedelta(hours=6)), # Last 6 hours + sl.PeriodTime(timedelta(days=1)), # Last day + sl.PeriodTime(timedelta(days=7)), # Last week + ], + ) + + # 3. Create Index + news_index = sl.Index( + [ + content_space, + headline_space, + topic_space, + source_space, + sentiment_space, + recency_space, + ] + ) + + # 4. Define Query + news_query = ( + sl.Query( + news_index, + weights={ + content_space: sl.Param("content_weight"), + headline_space: sl.Param("headline_weight"), + topic_space: sl.Param("topic_weight"), + source_space: sl.Param("source_weight"), + sentiment_space: sl.Param("sentiment_weight"), + recency_space: sl.Param("recency_weight"), + }, + ) + .find(news_schema) + .similar(content_space.text, sl.Param("query_text")) + .select( + [ + news_schema.headline, + news_schema.content, + news_schema.topic, + news_schema.sentiment_score, + news_schema.published_at, + news_schema.source, + ] + ) + .limit(sl.Param("limit")) + ) + + # 5. Sample news data + # Convert datetime objects to unix timestamps (integers) as required by Timestamp schema field + news_articles = [ + { + "id": "news1", + "headline": "Major Breakthrough in AI Research Announced", + "content": "Scientists have developed a new artificial intelligence model that shows remarkable improvements in natural language understanding.", + "topic": "technology", + "sentiment_score": 0.8, + "published_at": int((datetime.now() - timedelta(hours=2)).timestamp()), + "source": "TechCrunch", + }, + { + "id": "news2", + "headline": "Stock Market Faces Volatility Amid Economic Concerns", + "content": "Financial markets experienced significant fluctuations today as investors react to new economic data and policy announcements.", + "topic": "business", + "sentiment_score": -0.3, + "published_at": int((datetime.now() - timedelta(hours=8)).timestamp()), + "source": "Bloomberg", + }, + { + "id": "news3", + "headline": "New Climate Research Shows Promising Results", + "content": "Recent studies indicate that innovative climate technologies are showing positive environmental impact and could help address climate change.", + "topic": "science", + "sentiment_score": 0.6, + "published_at": int((datetime.now() - timedelta(hours=12)).timestamp()), + "source": "Reuters", + }, + { + "id": "news4", + "headline": "Tech Companies Report Strong Quarterly Earnings", + "content": "Several major technology companies exceeded expectations in their quarterly earnings reports, driven by AI and cloud computing growth.", + "topic": "technology", + "sentiment_score": 0.7, + "published_at": int((datetime.now() - timedelta(hours=4)).timestamp()), + "source": "CNN", + }, + ] + + # Create source and executor + source = sl.InMemorySource(schema=news_schema) + executor = sl.InMemoryExecutor(sources=[source], indices=[news_index]) + app = executor.run() + + # Add data to the source after the app is running + source.put(news_articles) + + # 6. Create Retriever + retriever = SuperlinkedRetriever( + sl_client=app, + sl_query=news_query, + page_content_field="content", + metadata_fields=[ + "headline", + "topic", + "sentiment_score", + "published_at", + "source", + ], + ) + + # 7. Demonstrate different news search strategies + print(f"Query: 'artificial intelligence developments'") + + # Recent technology news + results = retriever.invoke( + "artificial intelligence developments", + content_weight=0.8, + headline_weight=0.6, + topic_weight=0.4, + source_weight=0.2, + sentiment_weight=0.3, + recency_weight=1.0, # Prioritize recent news + limit=2, + ) + + print("\nRecent Technology News:") + for i, doc in enumerate(results, 1): + metadata = doc.metadata + published_timestamp = metadata["published_at"] + # Convert unix timestamp back to datetime for display calculation + published_time = datetime.fromtimestamp(published_timestamp) + hours_ago = (datetime.now() - published_time).total_seconds() / 3600 + sentiment = ( + "📈 Positive" + if metadata["sentiment_score"] > 0 + else "📉 Negative" + if metadata["sentiment_score"] < 0 + else "➡️ Neutral" + ) + + print(f" {i}. {metadata['headline']}") + print(f" Source: {metadata['source']} | {sentiment} | {hours_ago:.1f}h ago") + + print() + + +def demonstrate_langchain_integration(): + """ + Example 5: Integration with LangChain RAG pipeline + Shows how to use the SuperlinkedRetriever in a complete RAG workflow + """ + print("=== Example 5: LangChain RAG Integration ===") + + # This would typically be used with an actual LLM + # For demo purposes, we'll just show the retrieval part + + # Quick setup of a simple retriever + class FAQSchema(sl.Schema): + id: sl.IdField + question: sl.String + answer: sl.String + category: sl.String + + faq_schema = FAQSchema() + + text_space = sl.TextSimilaritySpace( + text=faq_schema.question, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + category_space = sl.CategoricalSimilaritySpace( + category_input=faq_schema.category, + categories=["technical", "billing", "general", "account"], + ) + + faq_index = sl.Index([text_space, category_space]) + + faq_query = ( + sl.Query( + faq_index, + weights={ + text_space: sl.Param("text_weight"), + category_space: sl.Param("category_weight"), + }, + ) + .find(faq_schema) + .similar(text_space.text, sl.Param("query_text")) + .select([faq_schema.question, faq_schema.answer, faq_schema.category]) + .limit(sl.Param("limit")) + ) + + # Sample FAQ data + faqs = [ + { + "id": "faq1", + "question": "How do I reset my password?", + "answer": "You can reset your password by clicking 'Forgot Password' on the login page and following the email instructions.", + "category": "account", + }, + { + "id": "faq2", + "question": "Why is my API not working?", + "answer": "Check your API key, rate limits, and ensure you're using the correct endpoint URL.", + "category": "technical", + }, + { + "id": "faq3", + "question": "How do I upgrade my subscription?", + "answer": "Visit the billing section in your account settings to upgrade your plan.", + "category": "billing", + }, + ] + + # Create source and executor + source = sl.InMemorySource(schema=faq_schema) + executor = sl.InMemoryExecutor(sources=[source], indices=[faq_index]) + app = executor.run() + + # Add data to the source after the app is running + source.put(faqs) + + retriever = SuperlinkedRetriever( + sl_client=app, + sl_query=faq_query, + page_content_field="answer", + metadata_fields=["question", "category"], + ) + + # Simulate a RAG query + user_question = "I can't access the API" + + print(f"User Question: '{user_question}'") + print("Retrieving relevant context...") + + context_docs = retriever.invoke( + user_question, text_weight=1.0, category_weight=0.3, limit=2 + ) + + print("\nRetrieved Context:") + for i, doc in enumerate(context_docs, 1): + print(f" {i}. Q: {doc.metadata['question']}") + print(f" A: {doc.page_content}") + print(f" Category: {doc.metadata['category']}") + + print( + "\n[In a real RAG setup, this context would be passed to an LLM to generate a response]" + ) + print() + + +def example_6_qdrant_vector_database(): + """ + Example 6: Same retriever with Qdrant vector database + Use case: Production deployment with persistent vector storage + + This demonstrates that SuperlinkedRetriever is vector database agnostic. + The SAME retriever code works with Qdrant (or Redis, MongoDB) by only + changing the executor configuration, not the retriever implementation. + """ + print("=== Example 6: Qdrant Vector Database ===") + + # 1. Define Schema (IDENTICAL to Example 1) + class DocumentSchema(sl.Schema): + id: sl.IdField + content: sl.String + + doc_schema = DocumentSchema() + + # 2. Define Space and Index (IDENTICAL to Example 1) + text_space = sl.TextSimilaritySpace( + text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" + ) + + doc_index = sl.Index([text_space]) + + # 3. Define Query (IDENTICAL to Example 1) + query = ( + sl.Query(doc_index) + .find(doc_schema) + .similar(text_space.text, sl.Param("query_text")) + .select([doc_schema.content]) + .limit(sl.Param("limit")) + ) + + # 4. Configure Qdrant Vector Database (ONLY DIFFERENCE!) + print("🔧 Configuring Qdrant vector database...") + try: + qdrant_vector_db = sl.QdrantVectorDatabase( + url="https://your-qdrant-cluster.qdrant.io", # Replace with your Qdrant URL + api_key="your-api-key-here", # Replace with your API key + default_query_limit=10, + vector_precision=sl.Precision.FLOAT16, + ) + print("Qdrant configuration created (credentials needed for actual connection)") + except Exception as e: + print(f"Qdrant not configured (expected without credentials): {e}") + print("Using in-memory fallback for demonstration...") + qdrant_vector_db = None + + # 5. Set up data and app (SLIGHT DIFFERENCE - vector database parameter) + documents = [ + { + "id": "doc1", + "content": "Machine learning algorithms can process large datasets efficiently.", + }, + { + "id": "doc2", + "content": "Natural language processing enables computers to understand human language.", + }, + { + "id": "doc3", + "content": "Deep learning models require significant computational resources.", + }, + { + "id": "doc4", + "content": "Data science combines statistics, programming, and domain expertise.", + }, + { + "id": "doc5", + "content": "Artificial intelligence is transforming various industries.", + }, + ] + + # Create source and executor with Qdrant (or fallback to in-memory) + source = sl.InMemorySource(schema=doc_schema) + + if qdrant_vector_db: + # Production setup with Qdrant + executor = sl.InMemoryExecutor( + sources=[source], + indices=[doc_index], + vector_database=qdrant_vector_db, # This makes it use Qdrant! + ) + storage_type = "Qdrant (persistent)" + else: + # Fallback to in-memory for demo + executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index]) + storage_type = "In-Memory (fallback)" + + app = executor.run() + + # Add data to the source after the app is running + source.put(documents) + + # 6. Create Retriever (IDENTICAL CODE!) + retriever = SuperlinkedRetriever( + sl_client=app, sl_query=query, page_content_field="content" + ) + + # 7. Use the retriever (IDENTICAL CODE!) + results = retriever.invoke("artificial intelligence and machine learning", limit=3) + + print(f"Vector Storage: {storage_type}") + print(f"Query: 'artificial intelligence and machine learning'") + print(f"Found {len(results)} documents:") + for i, doc in enumerate(results, 1): + print(f" {i}. {doc.page_content}") + + print( + "\nKey Insight: Same SuperlinkedRetriever code works with any vector database!" + ) + print( + "Only executor configuration changes, retriever implementation stays identical" + ) + print("Switch between in-memory → Qdrant → Redis → MongoDB without code changes") + print() + + +def main(): + """ + Run all examples to demonstrate the flexibility of SuperlinkedRetriever + """ + print("SuperlinkedRetriever Examples") + print("=" * 50) + print("This file demonstrates how the SuperlinkedRetriever can be used") + print("with different space configurations for various use cases.\n") + + try: + example_1_simple_text_search() + example_2_multi_space_blog_search() + example_3_ecommerce_product_search() + example_4_news_article_search() + demonstrate_langchain_integration() + example_6_qdrant_vector_database() + + print("All examples completed successfully!") + + except Exception as e: + print(f"Error running examples: {e}") + print("Make sure you have 'superlinked' package installed:") + print("pip install superlinked") +``` diff --git a/src/oss/python/integrations/retrievers/superlinked_examples.mdx b/src/oss/python/integrations/retrievers/superlinked_examples.mdx new file mode 100644 index 000000000..ab657fc5d --- /dev/null +++ b/src/oss/python/integrations/retrievers/superlinked_examples.mdx @@ -0,0 +1,173 @@ +--- +title: SuperlinkedRetriever Examples +--- +This notebook demonstrates how to build a Superlinked App and Query Descriptor and use them with the LangChain `SuperlinkedRetriever`. + +Install the integration from PyPI: + +```bash +pip install -U langchain-superlinked superlinked +``` + +## Setup + +Install the integration and its peer dependency: + +```bash +pip install -U langchain-superlinked superlinked +``` + +## Instantiation + +See below for creating a Superlinked App (`sl_client`) and a `QueryDescriptor` (`sl_query`), then wiring them into `SuperlinkedRetriever`. + +## Usage + +Call `retriever.invoke(query_text, **params)` to retrieve `Document` objects. Examples below show single-space and multi-space setups. + +## Use within a chain + +The retriever can be used in LangChain chains by piping it into your prompt and model. See the main Superlinked retriever page for a full RAG example. + +## API reference + +Refer to the API docs: + +* https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html + +```python +import superlinked.framework as sl +from langchain_superlinked import SuperlinkedRetriever +from datetime import timedelta + + +# Define schema +class DocumentSchema(sl.Schema): + id: sl.IdField + content: sl.String + + +doc_schema = DocumentSchema() + +# Space + index +text_space = sl.TextSimilaritySpace( + text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2" +) +doc_index = sl.Index([text_space]) + +# Query descriptor +query = ( + sl.Query(doc_index) + .find(doc_schema) + .similar(text_space.text, sl.Param("query_text")) + .select([doc_schema.content]) + .limit(sl.Param("limit")) +) + +# Minimal app +source = sl.InMemorySource(schema=doc_schema) +executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index]) +app = executor.run() + +# Data +source.put( + [ + {"id": "1", "content": "Machine learning algorithms process data efficiently."}, + { + "id": "2", + "content": "Natural language processing understands human language.", + }, + {"id": "3", "content": "Deep learning models require significant compute."}, + ] +) + +# Retriever +retriever = SuperlinkedRetriever( + sl_client=app, sl_query=query, page_content_field="content" +) + +retriever.invoke("artificial intelligence", limit=2) +``` + +```python +# Multi-space example (blog posts) +class BlogPostSchema(sl.Schema): + id: sl.IdField + title: sl.String + content: sl.String + category: sl.String + published_date: sl.Timestamp + + +blog = BlogPostSchema() + +content_space = sl.TextSimilaritySpace( + text=blog.content, model="sentence-transformers/all-MiniLM-L6-v2" +) +title_space = sl.TextSimilaritySpace( + text=blog.title, model="sentence-transformers/all-MiniLM-L6-v2" +) +cat_space = sl.CategoricalSimilaritySpace( + category_input=blog.category, categories=["technology", "science", "business"] +) +recency_space = sl.RecencySpace( + timestamp=blog.published_date, + period_time_list=[ + sl.PeriodTime(timedelta(days=30)), + sl.PeriodTime(timedelta(days=90)), + ], +) + +blog_index = sl.Index([content_space, title_space, cat_space, recency_space]) + +blog_query = ( + sl.Query( + blog_index, + weights={ + content_space: sl.Param("content_weight"), + title_space: sl.Param("title_weight"), + cat_space: sl.Param("category_weight"), + recency_space: sl.Param("recency_weight"), + }, + ) + .find(blog) + .similar(content_space.text, sl.Param("query_text")) + .select([blog.title, blog.content, blog.category, blog.published_date]) + .limit(sl.Param("limit")) +) + +source = sl.InMemorySource(schema=blog) +app = sl.InMemoryExecutor(sources=[source], indices=[blog_index]).run() + +from datetime import datetime + +source.put( + [ + { + "id": "p1", + "title": "Intro to ML", + "content": "Machine learning 101", + "category": "technology", + "published_date": int((datetime.now() - timedelta(days=5)).timestamp()), + }, + { + "id": "p2", + "title": "AI in Healthcare", + "content": "Transforming diagnosis", + "category": "science", + "published_date": int((datetime.now() - timedelta(days=15)).timestamp()), + }, + ] +) + +blog_retriever = SuperlinkedRetriever( + sl_client=app, + sl_query=blog_query, + page_content_field="content", + metadata_fields=["title", "category", "published_date"], +) + +blog_retriever.invoke( + "machine learning", content_weight=1.0, recency_weight=0.5, limit=2 +) +```