Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ jobs:

- name: Run Docker container
run: |
docker run --name ob433 -e MODE=slim -p 2881:2881 -d quay.io/oceanbase/oceanbase-ce:4.3.3.0-100000142024101215
docker run --name ob435 -e MODE=slim -p 2881:2881 -d oceanbase/oceanbase-ce:4.3.5.1-101000042025031818

- name: Wait for container to be ready
run: |
timeout=300
while [ $timeout -gt 0 ]; do
if docker logs ob433 | grep -q 'boot success!'; then
if docker logs ob435 | grep -q 'boot success!'; then
echo "Container is ready."
break
fi
Expand All @@ -56,7 +56,7 @@ jobs:
OCEANBASE_USER: 'root@test'
OCEANBASE_PASS: ''
run: |
docker exec ob433 obclient -h $OCEANBASE_HOST -P $OCEANBASE_PORT -u $OCEANBASE_USER -p$OCEANBASE_PASS -e "ALTER SYSTEM ob_vector_memory_limit_percentage = 30; create user 'jtuser'@'%'; GRANT SELECT, INSERT, UPDATE, DELETE ON test.* TO 'jtuser'@'%'; FLUSH PRIVILEGES;"
docker exec ob435 obclient -h $OCEANBASE_HOST -P $OCEANBASE_PORT -u $OCEANBASE_USER -p$OCEANBASE_PASS -e "ALTER SYSTEM ob_vector_memory_limit_percentage = 30; create user 'jtuser'@'%'; GRANT SELECT, INSERT, UPDATE, DELETE ON test.* TO 'jtuser'@'%'; FLUSH PRIVILEGES;"

- name: Run tests
run: |
Expand Down
10 changes: 10 additions & 0 deletions pyobvector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* DataType Specify field type in collection schema for MilvusLikeClient
* VECTOR An extended data type in SQLAlchemy for ObVecClient
* VectorIndex An extended index type in SQLAlchemy for ObVecClient
* FtsIndex Full Text Search Index
* FieldSchema Clas to define field schema in collection for MilvusLikeClient
* CollectionSchema Class to define collection schema for MilvusLikeClient
* PartType Specify partition type of table or collection
Expand All @@ -34,6 +35,9 @@
* st_distance GIS function: calculate distance between Points
* st_dwithin GIS function: check if the distance between two points
* st_astext GIS function: return a Point in human-readable format
* FtsParser Text Parser Type for Full Text Search
* FtsIndexParam Full Text Search index parameter
* MatchAgainst Full Text Search clause
"""
from .client import *
from .schema import (
Expand All @@ -50,6 +54,8 @@
st_distance,
st_dwithin,
st_astext,
FtsIndex,
MatchAgaint,
)
from .json_table import OceanBase

Expand All @@ -64,6 +70,7 @@
"VECTOR",
"POINT",
"VectorIndex",
"FtsIndex",
"OceanBaseDialect",
"AsyncOceanBaseDialect",
"FieldSchema",
Expand All @@ -88,4 +95,7 @@
"st_dwithin",
"st_astext",
"OceanBase",
"FtsParser",
"FtsIndexParam",
"MatchAgaint",
]
5 changes: 5 additions & 0 deletions pyobvector/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
* ObSubHashPartition Specify Hash subpartition info
* ObKeyPartition Specify Key partition info
* ObSubKeyPartition Specify Key subpartition info
* FtsParser Text Parser Type for Full Text Search
* FtsIndexParam Full Text Search index parameter
"""
from .ob_vec_client import ObVecClient
from .milvus_like_client import MilvusLikeClient
Expand All @@ -35,6 +37,7 @@
from .schema_type import DataType
from .collection_schema import FieldSchema, CollectionSchema
from .partitions import *
from .fts_index_param import FtsParser, FtsIndexParam

__all__ = [
"ObVecClient",
Expand All @@ -57,4 +60,6 @@
"ObSubHashPartition",
"ObKeyPartition",
"ObSubKeyPartition",
"FtsParser",
"FtsIndexParam",
]
44 changes: 44 additions & 0 deletions pyobvector/client/fts_index_param.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""A module to specify fts index parameters"""
from enum import Enum
from typing import List, Optional

class FtsParser(Enum):
IK = 0
NGRAM = 1


class FtsIndexParam:
def __init__(
self,
index_name: str,
field_names: List[str],
parser_type: Optional[FtsParser],
):
self.index_name = index_name
self.field_names = field_names
self.parser_type = parser_type

def param_str(self) -> str:
if self.parser_type is None:
return None
if self.parser_type == FtsParser.IK:
return "ik"
if self.parser_type == FtsParser.NGRAM:
return "ngram"

def __iter__(self):
yield "index_name", self.index_name
yield "field_names", self.field_names
if self.parser_type:
yield "parser_type", self.parser_type

def __str__(self):
return str(dict(self))

def __eq__(self, other: None):
if isinstance(other, self.__class__):
return dict(self) == dict(other)

if isinstance(other, dict):
return dict(self) == other
return False
108 changes: 79 additions & 29 deletions pyobvector/client/index_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
class VecIndexType(Enum):
"""Vector index algorithm type"""
HNSW = 0
# IVFFLAT = 1
HNSW_SQ = 1
IVFFLAT = 2
IVFSQ = 3
IVFPQ = 4


class IndexParam:
Expand All @@ -23,6 +26,11 @@ class IndexParam:
HNSW_DEFAULT_EF_CONSTRUCTION = 200
HNSW_DEFAULT_EF_SEARCH = 40
OCEANBASE_DEFAULT_ALGO_LIB = 'vsag'
HNSW_ALGO_NAME = "hnsw"
HNSW_SQ_ALGO_NAME = "hnsw_sq"
IVFFLAT_ALGO_NAME = "ivf_flat"
IVFSQ_ALGO_NAME = "ivf_sq8"
IVFPQ_ALGO_NAME = "ivf_pq"

def __init__(
self, index_name: str, field_name: str, index_type: Union[VecIndexType, str], **kwargs
Expand All @@ -33,47 +41,89 @@ def __init__(
self.index_type = self._get_vector_index_type_str()
self.kwargs = kwargs

def is_index_type_hnsw_serial(self):
return self.index_type in [
IndexParam.HNSW_ALGO_NAME, IndexParam.HNSW_SQ_ALGO_NAME
]

def is_index_type_ivf_serial(self):
return self.index_type in [
IndexParam.IVFFLAT_ALGO_NAME,
IndexParam.IVFSQ_ALGO_NAME,
IndexParam.IVFPQ_ALGO_NAME,
]

def is_index_type_product_quantization(self):
return self.index_type in [
IndexParam.IVFPQ_ALGO_NAME,
]

def _get_vector_index_type_str(self):
"""Parse vector index type to string."""
if isinstance(self.index_type, VecIndexType):
if self.index_type == VecIndexType.HNSW:
return "hnsw"
# elif self.index_type == VecIndexType.IVFFLAT:
# return "ivfflat"
return IndexParam.HNSW_ALGO_NAME
elif self.index_type == VecIndexType.HNSW_SQ:
return IndexParam.HNSW_SQ_ALGO_NAME
elif self.index_type == VecIndexType.IVFFLAT:
return IndexParam.IVFFLAT_ALGO_NAME
elif self.index_type == VecIndexType.IVFSQ:
return IndexParam.IVFSQ_ALGO_NAME
elif self.index_type == VecIndexType.IVFPQ:
return IndexParam.IVFPQ_ALGO_NAME
raise ValueError(f"unsupported vector index type: {self.index_type}")
assert isinstance(self.index_type, str)
if self.index_type.lower() == "hnsw":
return "hnsw"
raise ValueError(f"unsupported vector index type: {self.index_type}")
index_type = self.index_type.lower()
if index_type not in [
IndexParam.HNSW_ALGO_NAME,
IndexParam.HNSW_SQ_ALGO_NAME,
IndexParam.IVFFLAT_ALGO_NAME,
IndexParam.IVFSQ_ALGO_NAME,
IndexParam.IVFPQ_ALGO_NAME,
]:
raise ValueError(f"unsupported vector index type: {self.index_type}")
return index_type

def _parse_kwargs(self):
ob_params = {}
# handle lib
if self.is_index_type_hnsw_serial():
ob_params['lib'] = 'vsag'
else:
ob_params['lib'] = 'OB'
# handle metric_type
ob_params['distance'] = "l2"
if 'metric_type' in self.kwargs:
ob_params['distance'] = self.kwargs['metric_type']
elif self.index_type == "hnsw":
ob_params['distance'] = 'l2'
else:
raise ValueError(f"unsupported vector index type: {self.index_type}")
# handle param
if 'params' in self.kwargs:
for k, v in self.kwargs['params'].items():
if k == 'M':
ob_params['m'] = v
elif k == 'efConstruction':
ob_params['ef_construction'] = v
elif k == 'efSearch':
ob_params['ef_search'] = v
else:
ob_params[k] = v
elif self.index_type == "hnsw":
ob_params['m'] = IndexParam.HNSW_DEFAULT_M
ob_params['ef_construction'] = IndexParam.HNSW_DEFAULT_EF_CONSTRUCTION
ob_params['ef_search'] = IndexParam.HNSW_DEFAULT_EF_SEARCH
else:
raise ValueError(f"unsupported vector index type: {self.index_type}")
# Append OceanBase parameters.
ob_params['lib'] = IndexParam.OCEANBASE_DEFAULT_ALGO_LIB
if self.is_index_type_ivf_serial():
if (self.is_index_type_product_quantization() and
'params' not in self.kwargs):
raise ValueError('params must be configured for IVF index type')

if 'params' not in self.kwargs:
params = {}
else:
params = self.kwargs['params']

if self.is_index_type_product_quantization():
if 'm' not in params:
raise ValueError('m must be configured for IVFSQ or IVFPQ')
ob_params['m'] = params['m']
if 'nlist' in params:
ob_params['nlist'] = params['nlist']
if 'samples_per_nlist' in params:
ob_params['samples_per_nlist'] = params['samples_per_nlist']

if self.is_index_type_hnsw_serial():
if 'params' in self.kwargs:
params = self.kwargs['params']
if 'M' in params:
ob_params['m'] = params['M']
if 'efConstruction' in params:
ob_params['ef_construction'] = params['efConstruction']
if 'efSearch' in params:
ob_params['ef_search'] = params['efSearch']
return ob_params

def param_str(self):
Expand Down
Loading