In [None]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_core.documents import Document
from langchain_opengauss import OpenGauss, OpenGaussSettings

# Configure with schema validation
config = OpenGaussSettings(
    table_name="test_langchain",
    embedding_dimension=384,
    index_type="HNSW",
    distance_strategy="COSINE",
)

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key="xxxx", model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vector_store = OpenGauss(embedding=embeddings, config=config)

# Insert documents
docs = [
    Document(page_content="Quantum computing basics", metadata={"field": "physics"}),
    Document(page_content="Neural network architectures", metadata={"field": "ai"}),
]
vector_store.add_documents(docs)

# Vector similarity search
results = vector_store.similarity_search("deep learning models", k=1)
print(f"Found {len(results)} relevant documents")

## Manage vector store



In [None]:
from langchain_opengauss import OpenGauss, OpenGaussSettings

# Configure with schema validation
config = OpenGaussSettings(
    table_name="test_langchain",
    embedding_dimension=384,
    index_type="HNSW",
    distance_strategy="COSINE",
)

vector_store = OpenGauss(embedding=embeddings, config=config)

## Configuration

### Connection Settings
| Parameter           | Default                 | Description                                            |
|---------------------|-------------------------|--------------------------------------------------------|
| `host`              | localhost               | Database server address                                |
| `port`              | 8888                    | Database connection port                               |
| `user`              | gaussdb                 | Database username                                      |
| `password`          | -                       | Complex password string                                |
| `database`          | postgres                | Default database name                                  |
| `min_connections`   | 1                       | Connection pool minimum size                           |
| `max_connections`   | 5                       | Connection pool maximum size                           |
| `table_name`        | langchain_docs          | Name of the table for storing vector data and metadata |
| `index_type`        | IndexType.HNSW          |Vector index algorithm type. Options: HNSW or IVFFLAT\nDefault is HNSW.|
| `vector_type`       | VectorType.vector       |Type of vector representation to use. Default is Vector.|
| `distance_strategy` | DistanceStrategy.COSINE |Vector similarity metric to use for retrieval. Options: euclidean (L2 distance), cosine (angular distance, ideal for text embeddings), manhattan (L1 distance for sparse data), negative_inner_product (dot product for normalized vectors).\n Default is cosine.|
|`embedding_dimension`| 1536                    |Dimensionality of the vector embeddings.|

 Supported Combinations

| Vector Type | Dimensions | Index Types  | Supported Distance Strategies         |
|-------------|------------|--------------|---------------------------------------|
| vector      | ≤2000      | HNSW/IVFFLAT | COSINE/EUCLIDEAN/MANHATTAN/INNER_PROD |

## Query vector store


In [None]:
# Vector similarity search
results = vector_store.similarity_search("deep learning models", k=1)

# Vector similarity search with filter
results = vector_store.similarity_search(
    "neural network architectures", k=1, filter={"field": "ai"}
)

# openGauss Vector Store for LangChain

[openGauss](https://opengauss.org/en/) is a high-performance relational database with native vector storage and retrieval capabilities. This integration enables ACID-compliant vector operations within LangChain applications, combining traditional SQL functionality with modern AI-driven similarity search.

## Features
- **Hybrid SQL/Vector Operations**: Execute vector searches alongside SQL queries
- **Enterprise-Grade Reliability**: Full ACID compliance and connection pooling
- **Multiple Index Types**: HNSW and IVFFLAT vector indexing support
- **Dimension Validation**: Type-safe constraints up to 2000 dimensions
- **Metadata Filtering**: Combine vector similarity with SQL-style WHERE clauses

## Setup

### Launch openGauss Container

```bash
docker run --name opengauss \
  -d \
  -e GS_PASSWORD='MyStrongPass@123' \
  -p 8888:5432 \
  opengauss/opengauss-server:latest
```

## Initialization

```bash
pip install langchain-opengauss
```

**System Requirements**:
- openGauss ≥ 7.0.0
- Python ≥ 3.8
- psycopg2-binary

## Quick Start

### 1. Basic Vector Operations

## Advanced Usage

### Index Management

In [None]:
# Create optimized HNSW index
vector_store.create_hnsw_index(
    m=24,  # Number of bi-directional links
    ef_construction=128,  # Construction search scope
    ef=64,  # Query search scope
)

# IVFFLAT index configuration
vector_store.create_ivfflat_index(
    lists=1000  # Number of cluster centers
)

## Performance Optimization

### Index Tuning Guidelines
**HNSW Parameters**:
- `m`: 16-100 (balance between recall and memory)
- `ef_construction`: 64-1000 (must be > 2*m)

**IVFFLAT Recommendations**:

In [None]:
import math

lists = min(
    int(math.sqrt(total_rows)) if total_rows > 1e6 else int(total_rows / 1000),
    2000,  # openGauss maximum
)

### Connection Pooling

In [None]:
OpenGaussSettings(min_connections=3, max_connections=20)

### Core Methods
| Method                         | Description                                   |
|--------------------------------|-----------------------------------------------|
| `add_documents`                | Insert documents with automatic embedding     |
| `similarity_search `           | Basic vector similarity search                |
| `similarity_search_with_score` | Return (document, similarity_score) tuples   |
| `delete`                       | Remove documents by ID list                  |
| `drop_table`                   | Delete entire collection                     |

## Limitations
- `bit` and `sparsevec` vector types currently in development
- Maximum vector dimensions: 2000 for `vector` type