# Client

> A module for writing and querying vectors to Postgres

In [97]:
#| default_exp client

In [98]:
#| hide
from nbdev.showdoc import *

In [99]:
#| hide
from dotenv import load_dotenv, find_dotenv
import os

In [100]:
_ = load_dotenv(find_dotenv(), override=True)
service_url = os.environ['TIMESCALE_SERVICE_URL']

In [101]:
#| export
import asyncpg
import uuid
from pgvector.asyncpg import register_vector
from typing import (List, Optional, Union, Dict, Tuple, Any, Iterable, Callable)
import json
import numpy as np
import math
import random
from datetime import timedelta
from datetime import datetime
from datetime import timezone
import calendar

In [102]:
#| export
#copied from Cassandra: https://docs.datastax.com/en/drivers/python/3.2/_modules/cassandra/util.html#uuid_from_time
def uuid_from_time(time_arg = None, node=None, clock_seq=None):
    """
    Converts a datetime or timestamp to a type 1 `uuid.UUID`.

    Parameters
    ----------
    time_arg
        The time to use for the timestamp portion of the UUID.
        This can either be a `datetime` object or a timestamp in seconds
        (as returned from `time.time()`).
    node
        Bytes for the UUID (up to 48 bits). If not specified, this
        field is randomized.
    clock_seq
        Clock sequence field for the UUID (up to 14 bits). If not specified,
        a random sequence is generated.

    Returns
    -------
        uuid.UUID:  For the given time, node, and clock sequence
    """
    if time_arg is None:
        return uuid.uuid1(node, clock_seq)
    if hasattr(time_arg, 'utctimetuple'):
        # this is different from the Cassandra version, we assume that a naive datetime is in system time and convert it to UTC
        # we do this because naive datetimes are interpreted as timestamps (without timezone) in postgres
        if time_arg.tzinfo is None:
            time_arg = time_arg.astimezone(timezone.utc)
        seconds = int(calendar.timegm(time_arg.utctimetuple()))
        microseconds = (seconds * 1e6) + time_arg.time().microsecond
    else:
        microseconds = int(time_arg * 1e6)

    # 0x01b21dd213814000 is the number of 100-ns intervals between the
    # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
    intervals = int(microseconds * 10) + 0x01b21dd213814000

    time_low = intervals & 0xffffffff
    time_mid = (intervals >> 32) & 0xffff
    time_hi_version = (intervals >> 48) & 0x0fff

    if clock_seq is None:
        clock_seq = random.getrandbits(14)
    else:
        if clock_seq > 0x3fff:
            raise ValueError('clock_seq is out of range (need a 14-bit value)')

    clock_seq_low = clock_seq & 0xff
    clock_seq_hi_variant = 0x80 | ((clock_seq >> 8) & 0x3f)

    if node is None:
        node = random.getrandbits(48)

    return uuid.UUID(fields=(time_low, time_mid, time_hi_version,
                             clock_seq_hi_variant, clock_seq_low, node), version=1)

# Index Definitions

In [103]:
#| export

class BaseIndex:
    def get_index_method(self, distance_type: str) -> str:
        index_method = "invalid"
        if distance_type == "<->":
            index_method = "vector_l2_ops"
        elif distance_type == "<#>":
            index_method = "vector_ip_ops"
        elif distance_type == "<=>":
            index_method = "vector_cosine_ops"
        else:
            raise ValueError(f"Unknown distance type {distance_type}")
        return index_method

    def create_index_query(self, table_name_quoted:str, column_name_quoted: str, index_name_quoted: str, distance_type: str, num_records_callback: Callable[[], int]) -> str:
        raise NotImplementedError()

class IvfflatIndex(BaseIndex):
    def __init__(self, num_records: Optional[int] = None, num_lists: Optional[int] = None) -> None:
        """
        Pgvector's ivfflat index.
        """
        self.num_records = num_records
        self.num_lists = num_lists
    
    def get_num_records(self, num_record_callback: Callable[[], int]) -> int:
        if self.num_records is not None:
            return self.num_records
        return num_record_callback()

    def get_num_lists(self, num_records_callback: Callable[[], int]) -> int:
        if self.num_lists is not None:
            return self.num_lists
        
        num_records = self.get_num_records(num_records_callback)
        num_lists = num_records / 1000
        if num_lists < 10:
            num_lists = 10
        if num_records > 1000000:
            num_lists = math.sqrt(num_records)
        return num_lists
    

    def create_index_query(self, table_name_quoted:str, column_name_quoted: str, index_name_quoted: str, distance_type: str, num_records_callback: Callable[[], int]) -> str:
        index_method = self.get_index_method(distance_type)
        num_lists = self.get_num_lists(num_records_callback)

        return "CREATE INDEX {index_name} ON {table_name} USING ivfflat ({column_name} {index_method}) WITH (lists = {num_lists});"\
            .format(index_name=index_name_quoted, table_name=table_name_quoted, column_name=column_name_quoted, index_method=index_method, num_lists=num_lists)


class HNSWIndex(BaseIndex):
    def __init__(self, m: Optional[int] = None, ef_construction: Optional[int] = None) -> None:
        """
        Pgvector's hnsw index.
        """
        self.m = m
        self.ef_construction = ef_construction

    def create_index_query(self, table_name_quoted:str, column_name_quoted: str, index_name_quoted: str, distance_type: str, num_records_callback: Callable[[], int]) -> str:
        index_method = self.get_index_method(distance_type)

        with_clauses = []
        if self.m is not None:
            with_clauses.append(f"m = {self.m}")
        if self.ef_construction is not None:
            with_clauses.append(f"ef_construction = {self.ef_construction}")
        
        with_clause = ""
        if len(with_clauses) > 0:
            with_clause = "WITH (" + ", ".join(with_clauses) + ")"

        return "CREATE INDEX {index_name} ON {table_name} USING hnsw ({column_name} {index_method}) {with_clause};"\
            .format(index_name=index_name_quoted, table_name=table_name_quoted, column_name=column_name_quoted, index_method=index_method, with_clause=with_clause)

class TimescaleVectorIndex(BaseIndex):
    def __init__(self, 
                 use_pq: Optional[bool] = None, 
                 num_neighbors: Optional[int] = None, 
                 search_list_size: Optional[int] = None, 
                 max_alpha: Optional[float] = None,
                 pq_vector_length: Optional[int] = None,
                 ) -> None:
        """
        Timescale's vector index.
        """
        self.use_pq = use_pq
        self.num_neighbors = num_neighbors
        self.search_list_size = search_list_size
        self.max_alpha = max_alpha
        self.pq_vector_length = pq_vector_length

    def create_index_query(self, table_name_quoted:str, column_name_quoted: str, index_name_quoted: str, distance_type: str, num_records_callback: Callable[[], int]) -> str:

        with_clauses = []
        if self.use_pq is not None:
            with_clauses.append(f"use_pq = {self.use_pq}")
        if self.num_neighbors is not None:
            with_clauses.append(f"num_neighbors = {self.num_neighbors}")
        if self.search_list_size is not None:
            with_clauses.append(f"search_list_size = {self.search_list_size}")
        if self.max_alpha is not None:
            with_clauses.append(f"max_alpha = {self.max_alpha}")
        if self.pq_vector_length is not None:
            with_clauses.append(f"pq_vector_length = {self.pq_vector_length}")
        
        with_clause = ""
        if len(with_clauses) > 0:
            with_clause = "WITH (" + ", ".join(with_clauses) + ")"

        return "CREATE INDEX {index_name} ON {table_name} USING tsv ({column_name}) {with_clause};"\
            .format(index_name=index_name_quoted, table_name=table_name_quoted, column_name=column_name_quoted, with_clause=with_clause)


# Query Builder

In [104]:
#| export

SEARCH_RESULT_ID_IDX = 0
SEARCH_RESULT_METADATA_IDX = 1
SEARCH_RESULT_CONTENTS_IDX = 2
SEARCH_RESULT_EMBEDDING_IDX = 3
SEARCH_RESULT_DISTANCE_IDX = 4

In [105]:
#| export
class UUIDTimeRange:
    
    @staticmethod
    def _parse_datetime(input_datetime: Union[datetime, str]):
        """
        Parse a datetime object or string representation of a datetime.

        Args:
            input_datetime (datetime or str): Input datetime or string.

        Returns:
            datetime: Parsed datetime object.

        Raises:
            ValueError: If the input cannot be parsed as a datetime.
        """
        if input_datetime is None or input_datetime == "None":
            return None
        
        if isinstance(input_datetime, datetime):
            # If input is already a datetime object, return it as is
            return input_datetime

        if isinstance(input_datetime, str):
            try:
                # Attempt to parse the input string into a datetime
                return datetime.fromisoformat(input_datetime)
            except ValueError:
                raise ValueError("Invalid datetime string format: {}".format(input_datetime))

        raise ValueError("Input must be a datetime object or string")

    def __init__(self, start_date: Optional[Union[datetime, str]] = None, end_date: Optional[Union[datetime, str]] = None, time_delta: Optional[timedelta] = None, start_inclusive=True, end_inclusive=False):
        """
         A UUIDTimeRange is a time range predicate on the UUID Version 1 timestamps. 
         
         Note that naive datetime objects are interpreted as local time on the python client side and converted to UTC before being sent to the database.
        """
        start_date = UUIDTimeRange._parse_datetime(start_date)
        end_date = UUIDTimeRange._parse_datetime(end_date)

        if start_date is not None and end_date is not None:
            if start_date > end_date:
                raise Exception("start_date must be before end_date")
        
        if start_date is None and end_date is None:
            raise Exception("start_date and end_date cannot both be None")
        
        if start_date is not None and start_date.tzinfo is None:
            start_date = start_date.astimezone(timezone.utc)

        if end_date is not None and end_date.tzinfo is None:
            end_date = end_date.astimezone(timezone.utc)
        
        if time_delta is not None:
            if end_date is None:
                end_date = start_date + time_delta
            elif start_date is None:
                start_date = end_date - time_delta
            else:
                raise Exception("time_delta, start_date and end_date cannot all be specified at the same time")

        self.start_date = start_date
        self.end_date = end_date
        self.start_inclusive = start_inclusive
        self.end_inclusive = end_inclusive
    
    def __str__(self):
        start_str = f"[{self.start_date}" if self.start_inclusive else f"({self.start_date}"
        end_str = f"{self.end_date}]" if self.end_inclusive else f"{self.end_date})"
        
        return f"UUIDTimeRange {start_str}, {end_str}"

    def build_query(self, params: List) -> Tuple[str, List]:
        column = "uuid_timestamp(id)"
        queries = []
        if self.start_date is not None:
            if self.start_inclusive:
                queries.append(f"{column} >= ${len(params)+1}")
            else:
                queries.append(f"{column} > ${len(params)+1}")
            params.append(self.start_date)
        if self.end_date is not None:
            if self.end_inclusive:
                queries.append(f"{column} <= ${len(params)+1}")
            else:
                queries.append(f"{column} < ${len(params)+1}")
            params.append(self.end_date)
        return " AND ".join(queries), params         

In [106]:
#| export

class Predicates:
    logical_operators = {
        "AND": "AND",
        "OR": "OR",
        "NOT": "NOT",
    }

    operators_mapping = {
        "=": "=",
        "==": "=",
        ">=": ">=",
        ">": ">",
        "<=": "<=",
        "<": "<",
        "!=": "<>",
    }

    PredicateValue = Union[str, int, float]

    def __init__(self, *clauses: Union['Predicates', Tuple[str, PredicateValue], Tuple[str, str, PredicateValue], str, PredicateValue], operator: str = 'AND'):
        """
        Predicates class defines predicates on the object metadata. Predicates can be combined using logical operators (&, |, and ~).

        Parameters
        ----------
        clauses
            Predicate clauses. Can be either another Predicates object or a tuple of the form (field, operator, value) or (field, value).
        Operator
            Logical operator to use when combining the clauses. Can be one of 'AND', 'OR', 'NOT'. Defaults to 'AND'.
        """
        if operator not in self.logical_operators: 
            raise ValueError(f"invalid operator: {operator}")
        self.operator = operator
        if isinstance(clauses[0], str):
            if len(clauses) != 3 or not (isinstance(clauses[1], str) and isinstance(clauses[2], self.PredicateValue)):
                raise ValueError(f"Invalid clause format: {clauses}")
            self.clauses = [(clauses[0], clauses[1], clauses[2])]
        else:
            self.clauses = list(clauses)

    def add_clause(self, *clause: Union['Predicates', Tuple[str, PredicateValue], Tuple[str, str, PredicateValue], str, PredicateValue]):
        """
        Add a clause to the predicates object.

        Parameters
        ----------
        clause: 'Predicates' or Tuple[str, str] or Tuple[str, str, str]
            Predicate clause. Can be either another Predicates object or a tuple of the form (field, operator, value) or (field, value).
        """
        if isinstance(clause[0], str):
            if len(clause) != 3 or not (isinstance(clause[1], str) and isinstance(clause[2], self.PredicateValue)):
                raise ValueError(f"Invalid clause format: {clause}")
            self.clauses.append((clause[0], clause[1], clause[2]))
        else:
            self.clauses.extend(list(clause))
        
    def __and__(self, other):
        new_predicates = Predicates(self, other, operator='AND')
        return new_predicates

    def __or__(self, other):
        new_predicates = Predicates(self, other, operator='OR')
        return new_predicates

    def __invert__(self):
        new_predicates = Predicates(self, operator='NOT')
        return new_predicates

    def __eq__(self, other):
        if not isinstance(other, Predicates):
            return False

        return (
            self.operator == other.operator and
            self.clauses == other.clauses
        )

    def __repr__(self):
        if self.operator:
            return f"{self.operator}({', '.join(repr(clause) for clause in self.clauses)})"
        else:
            return repr(self.clauses)

    def build_query(self, params: List) -> Tuple[str, List]:
        """
        Build the SQL query string and parameters for the predicates object.
        """
        if not self.clauses:
            return "", []

        where_conditions = [] 

        for clause in self.clauses:
            if isinstance(clause, Predicates):
                child_where_clause, params = clause.build_query(params)
                where_conditions.append(f"({child_where_clause})")
            elif isinstance(clause, tuple):
                if len(clause) == 2:
                    field, value = clause
                    operator = "="  # Default operator
                elif len(clause) == 3:
                    field, operator, value = clause
                    if operator not in self.operators_mapping:
                       raise ValueError(f"Invalid operator: {operator}") 
                    operator = self.operators_mapping[operator]
                else:
                    raise ValueError("Invalid clause format")

                index = len(params)+1
                param_name = f"${index}"

                if field == '__uuid_timestamp':
                    #convert str to timestamp in the database, it's better at it than python
                    if isinstance(value, str):
                        where_conditions.append(f"uuid_timestamp(id) {operator} ({param_name}::text)::timestamptz")
                    else:
                        where_conditions.append(f"uuid_timestamp(id) {operator} {param_name}")
                    params.append(value)
                    continue

                field_cast = ''
                if isinstance(value, int):
                    field_cast = '::int'
                elif isinstance(value, float):
                    field_cast = '::numeric'  

                where_conditions.append(f"(metadata->>'{field}'){field_cast} {operator} {param_name}")
                params.append(value) 

        if self.operator == 'NOT':
            or_clauses = (" OR ").join(where_conditions)
            #use IS DISTINCT FROM to treat all-null clauses as False and pass the filter
            where_clause = f"TRUE IS DISTINCT FROM ({or_clauses})"
        else:
            where_clause = (" "+self.operator+" ").join(where_conditions)
        return where_clause, params

In [107]:
#| export
class QueryBuilder:
    def __init__(
            self,
            table_name: str,
            num_dimensions: int,
            distance_type: str,
            id_type: str,
            time_partition_interval: Optional[timedelta],
            infer_filters: bool) -> None:
        """
        Initializes a base Vector object to generate queries for vector clients.

        Parameters
        ----------
        table_name
            The name of the table.
        num_dimensions
            The number of dimensions for the embedding vector.
        distance_type
            The distance type for indexing.
        id_type
            The type of the id column. Can be either 'UUID' or 'TEXT'.
        """
        self.table_name = table_name
        self.num_dimensions = num_dimensions
        if distance_type == 'cosine' or distance_type == '<=>':
            self.distance_type = '<=>'
        elif distance_type == 'euclidean' or distance_type == '<->' or distance_type == 'l2':
            self.distance_type = '<->'
        else:
            raise ValueError(f"unrecognized distance_type {distance_type}")

        if id_type.lower() != 'uuid' and id_type.lower() != 'text':
            raise ValueError(f"unrecognized id_type {id_type}")

        if time_partition_interval is not None and id_type.lower() != 'uuid':
            raise ValueError(f"time partitioning is only supported for uuid id_type")

        self.id_type = id_type.lower()
        self.time_partition_interval = time_partition_interval
        self.infer_filters = infer_filters

    @staticmethod
    def _quote_ident(ident):
        """
        Quotes an identifier to prevent SQL injection.

        Parameters
        ----------
        ident
            The identifier to be quoted.

        Returns
        -------
            str: The quoted identifier.
        """
        return '"{}"'.format(ident.replace('"', '""'))

    def get_row_exists_query(self):
        """
        Generates a query to check if any rows exist in the table.

        Returns
        -------
            str: The query to check for row existence.
        """
        return "SELECT 1 FROM {table_name} LIMIT 1".format(table_name=self._quote_ident(self.table_name))

    def get_upsert_query(self):
        """
        Generates an upsert query.

        Returns
        -------
            str: The upsert query.
        """
        return "INSERT INTO {table_name} (id, metadata, contents, embedding) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING".format(table_name=self._quote_ident(self.table_name))

    def get_approx_count_query(self):
        """
        Generate a query to find the approximate count of records in the table.

        Returns
        -------
            str: the query.
        """
        # todo optimize with approx
        return "SELECT COUNT(*) as cnt FROM {table_name}".format(table_name=self._quote_ident(self.table_name))

    #| export
    def get_create_query(self):
        """
        Generates a query to create the tables, indexes, and extensions needed to store the vector data.

        Returns
        -------
            str: The create table query.
        """
        hypertable_sql = ""
        if self.time_partition_interval is not None:
            hypertable_sql = '''
                CREATE EXTENSION IF NOT EXISTS timescaledb;

                CREATE OR REPLACE FUNCTION public.uuid_timestamp(uuid UUID) RETURNS TIMESTAMPTZ AS $$
                DECLARE
                bytes bytea;
                BEGIN
                bytes := uuid_send(uuid);
                if  (get_byte(bytes, 6) >> 4)::int2 != 1 then
                    RAISE EXCEPTION 'UUID version is not 1';
                end if;
                RETURN to_timestamp(
                            (
                                (
                                (get_byte(bytes, 0)::bigint << 24) |
                                (get_byte(bytes, 1)::bigint << 16) |
                                (get_byte(bytes, 2)::bigint <<  8) |
                                (get_byte(bytes, 3)::bigint <<  0)
                                ) + (
                                ((get_byte(bytes, 4)::bigint << 8 |
                                get_byte(bytes, 5)::bigint)) << 32
                                ) + (
                                (((get_byte(bytes, 6)::bigint & 15) << 8 | get_byte(bytes, 7)::bigint) & 4095) << 48
                                ) - 122192928000000000
                            ) / 10000 / 1000::double precision
                        );
                END
                $$ LANGUAGE plpgsql
                IMMUTABLE PARALLEL SAFE
                RETURNS NULL ON NULL INPUT;

                SELECT create_hypertable('{table_name}', 
                    'id', 
                    if_not_exists=> true, 
                    time_partitioning_func=>'public.uuid_timestamp', 
                    chunk_time_interval => '{chunk_time_interval} seconds'::interval);
            '''.format(
                table_name=self._quote_ident(self.table_name), 
                chunk_time_interval=str(self.time_partition_interval.total_seconds()),
                )
        return '''
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS timescale_vector;


CREATE TABLE IF NOT EXISTS {table_name} (
    id {id_type} PRIMARY KEY,
    metadata JSONB,
    contents TEXT,
    embedding VECTOR({dimensions})
);

CREATE INDEX IF NOT EXISTS {index_name} ON {table_name} USING GIN(metadata jsonb_path_ops);

{hypertable_sql}
'''.format(
            table_name=self._quote_ident(self.table_name), 
            id_type=self.id_type, 
            index_name=self._quote_ident(self.table_name+"_meta_idx"), 
            dimensions=self.num_dimensions,
            hypertable_sql=hypertable_sql,
            )

    def _get_embedding_index_name(self):
        return self._quote_ident(self.table_name+"_embedding_idx")

    def drop_embedding_index_query(self):
        return "DROP INDEX IF EXISTS {index_name};".format(index_name=self._get_embedding_index_name())

    def delete_all_query(self):
        return "TRUNCATE {table_name};".format(table_name=self._quote_ident(self.table_name))

    def delete_by_ids_query(self, ids: Union[List[uuid.UUID], List[str]]) -> Tuple[str, List]:
        query = "DELETE FROM {table_name} WHERE id = ANY($1::{id_type}[]);".format(
            table_name=self._quote_ident(self.table_name), id_type=self.id_type)
        return (query, [ids])

    def delete_by_metadata_query(self, filter: Union[Dict[str, str], List[Dict[str, str]]]) -> Tuple[str, List]:
        params: List[Any] = []
        (where, params) = self._where_clause_for_filter(params, filter)
        query = "DELETE FROM {table_name} WHERE {where};".format(
            table_name=self._quote_ident(self.table_name), where=where)
        return (query, params)

    def drop_table_query(self):
        return "DROP TABLE IF EXISTS {table_name};".format(table_name=self._quote_ident(self.table_name))
    
    def default_max_db_connection_query(self):
        """
        Generates a query to get the default max db connections. This uses a heuristic to determine the max connections based on the max_connections setting in postgres
        and the number of currently used connections. This heuristic leaves 4 connections in reserve.
        """
        return "SELECT greatest(1, ((SELECT setting::int FROM pg_settings WHERE name='max_connections')-(SELECT count(*) FROM pg_stat_activity) - 4)::int)"
    
    def create_embedding_index_query(self, index: BaseIndex, num_records_callback: Callable[[], int]) -> str:
        """
        Generates an embedding index creation query.

        Parameters
        ----------
        index
            The index to create.
        num_records_callback
            A callback function to get the number of records in the table.

        Returns
        -------
            str: The index creation query.
        """
        column_name = "embedding"
        index_name = self._get_embedding_index_name()
        query = index.create_index_query(self._quote_ident(self.table_name), self._quote_ident(column_name), index_name, self.distance_type, num_records_callback)
        return query

    def _where_clause_for_filter(self, params: List, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]]) -> Tuple[str, List]:
        if filter == None:
            return ("TRUE", params)

        if isinstance(filter, dict):
            where = "metadata @> ${index}".format(index=len(params)+1)
            json_object = json.dumps(filter)
            params = params + [json_object]
        elif isinstance(filter, list):
            any_params = []
            for idx, filter_dict in enumerate(filter, start=len(params) + 1):
                any_params.append(json.dumps(filter_dict))
            where = "metadata @> ANY(${index}::jsonb[])".format(
                index=len(params) + 1)
            params = params + [any_params]
        else:
            raise ValueError("Unknown filter type: {filter_type}".format(filter_type=type(filter)))

        return (where, params)

    def search_query(
            self, 
            query_embedding: Optional[Union[List[float], np.ndarray]], 
            limit: int = 10, 
            filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None, 
            predicates: Optional[Predicates] = None,
            uuid_time_filter: Optional[UUIDTimeRange] = None,
            ) -> Tuple[str, List]:
        """
        Generates a similarity query.

        Returns:
            Tuple[str, List]: A tuple containing the query and parameters.
        """
        params: List[Any] = []
        if query_embedding is not None:
            distance = "embedding {op} ${index}".format(
                op=self.distance_type, index=len(params)+1)
            params = params + [query_embedding]
            order_by_clause = "ORDER BY {distance} ASC".format(
                distance=distance)
        else:
            distance = "-1.0"
            order_by_clause = ""

        if self.infer_filters:
            if uuid_time_filter is None and isinstance(filter, dict):
                if "__start_date" in filter or "__end_date" in filter:
                    start_date = UUIDTimeRange._parse_datetime(filter.get("__start_date"))
                    end_date = UUIDTimeRange._parse_datetime(filter.get("__end_date"))
                    
                    uuid_time_filter = UUIDTimeRange(start_date, end_date)
                    
                    if start_date is not None:
                        del filter["__start_date"]
                    if end_date is not None:
                        del filter["__end_date"]


        where_clauses = []
        if filter is not None:
            (where_filter, params) = self._where_clause_for_filter(params, filter)
            where_clauses.append(where_filter)

        if predicates is not None:
            (where_predicates, params) = predicates.build_query(params)
            where_clauses.append(where_predicates)

        if uuid_time_filter is not None:
            #if self.time_partition_interval is None:
                #raise ValueError("""uuid_time_filter is only supported when time_partitioning is enabled.""")
            
            (where_time, params) = uuid_time_filter.build_query(params)
            where_clauses.append(where_time)
        
        if len(where_clauses) > 0:
            where = " AND ".join(where_clauses)
        else:
            where = "TRUE"

        query = '''
        SELECT
            id, metadata, contents, embedding, {distance} as distance
        FROM
           {table_name}
        WHERE 
           {where}
        {order_by_clause}
        LIMIT {limit}
        '''.format(distance=distance, order_by_clause=order_by_clause, where=where, table_name=self._quote_ident(self.table_name), limit=limit)
        return (query, params)

In [108]:
show_doc(QueryBuilder.get_create_query)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L475){target="_blank" style="float:right; font-size:smaller"}

### QueryBuilder.get_create_query

>      QueryBuilder.get_create_query ()

Generates a query to create the tables, indexes, and extensions needed to store the vector data.

# Async Client

In [109]:
#| export
class Async(QueryBuilder):
    def __init__(
            self,
            service_url: str,
            table_name: str,
            num_dimensions: int,
            distance_type: str = 'cosine',
            id_type='UUID',
            time_partition_interval: Optional[timedelta] = None,
            max_db_connections: Optional[int] = None,
            infer_filters: bool = True,
            ) -> None:
        """
        Initializes a async client for storing vector data.

        Parameters
        ----------
        service_url
            The connection string for the database.
        table_name
            The name of the table.
        num_dimensions
            The number of dimensions for the embedding vector.
        distance_type
            The distance type for indexing.
        id_type
            The type of the id column. Can be either 'UUID' or 'TEXT'.
        """
        self.builder = QueryBuilder(
            table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters)
        self.service_url = service_url
        self.pool = None
        self.max_db_connections = max_db_connections
        self.time_partition_interval = time_partition_interval

    async def _default_max_db_connections(self) -> int:
        """
        Gets a default value for the number of max db connections to use.

        Returns
        -------
            None
        """
        query = self.builder.default_max_db_connection_query()
        conn = await asyncpg.connect(dsn=self.service_url)
        num_connections = await conn.fetchval(query)
        await conn.close()
        return num_connections


    async def connect(self):
        """
        Establishes a connection to a PostgreSQL database using asyncpg.

        Returns
        -------
            asyncpg.Connection: The established database connection.
        """
        if self.pool == None:
            if self.max_db_connections == None:
                self.max_db_connections = await self._default_max_db_connections()
            async def init(conn):
                await register_vector(conn)
                # decode to a dict, but accept a string as input in upsert
                await conn.set_type_codec(
                    'jsonb',
                    encoder=str,
                    decoder=json.loads,
                    schema='pg_catalog')

            self.pool = await asyncpg.create_pool(dsn=self.service_url, init=init, min_size=1, max_size=self.max_db_connections)
        return self.pool.acquire()

    async def close(self):
        if self.pool != None:
            await self.pool.close()

    async def table_is_empty(self):
        """
        Checks if the table is empty.

        Returns
        -------
            bool: True if the table is empty, False otherwise.
        """
        query = self.builder.get_row_exists_query()
        async with await self.connect() as pool:
            rec = await pool.fetchrow(query)
            return rec == None

    def munge_record(self, records) -> Iterable[Tuple[uuid.UUID, str, str, List[float]]]:
        metadata_is_dict = isinstance(records[0][1], dict)
        if metadata_is_dict:
           records = map(lambda item: Async._convert_record_meta_to_json(item), records)

        return records 

    def _convert_record_meta_to_json(item):
        if not isinstance(item[1], dict):
            raise ValueError(
                "Cannot mix dictionary and string metadata fields in the same upsert")
        return (item[0], json.dumps(item[1]), item[2], item[3])

    async def upsert(self, records):
        """
        Performs upsert operation for multiple records.

        Parameters
        ----------
        records
            List of records to upsert. Each record is a tuple of the form (id, metadata, contents, embedding).

        Returns
        -------
            None
        """
        records = self.munge_record(records)
        query = self.builder.get_upsert_query()
        async with await self.connect() as pool:
            await pool.executemany(query, records)

    async def create_tables(self):
        """
        Creates necessary tables.

        Returns
        -------
            None
        """
        query = self.builder.get_create_query()
        #don't use a connection pool for this because the vector extension may not be installed yet and if it's not installed, register_vector will fail.
        conn = await asyncpg.connect(dsn=self.service_url)
        await conn.execute(query)
        await conn.close()

    async def delete_all(self, drop_index=True):
        """
        Deletes all data. Also drops the index if `drop_index` is true.

        Returns
        -------
            None
        """
        if drop_index:
            await self.drop_embedding_index()
        query = self.builder.delete_all_query()
        async with await self.connect() as pool:
            await pool.execute(query)

    async def delete_by_ids(self, ids: Union[List[uuid.UUID], List[str]]):
        """
        Delete records by id.
        """
        (query, params) = self.builder.delete_by_ids_query(ids)
        async with await self.connect() as pool:
            return await pool.fetch(query, *params)

    async def delete_by_metadata(self, filter: Union[Dict[str, str], List[Dict[str, str]]]):
        """
        Delete records by metadata filters.
        """
        (query, params) = self.builder.delete_by_metadata_query(filter)
        async with await self.connect() as pool:
            return await pool.fetch(query, *params)

    async def drop_table(self):
        """
        Drops the table

        Returns
        -------
            None
        """
        query = self.builder.drop_table_query()
        async with await self.connect() as pool:
            await pool.execute(query)

    async def _get_approx_count(self):
        """
        Retrieves an approximate count of records in the table.

        Returns
        -------
            int: Approximate count of records.
        """
        query = self.builder.get_approx_count_query()
        async with await self.connect() as pool:
            rec = await pool.fetchrow(query)
            return rec[0]

    async def drop_embedding_index(self):
        """
        Drop any index on the emedding

        Returns
        -------
            None
        """
        query = self.builder.drop_embedding_index_query()
        async with await self.connect() as pool:
            await pool.execute(query)

    async def create_embedding_index(self, index: BaseIndex):
        """
        Creates an index for the table.

        Parameters
        ----------
        index
            The index to create.

        Returns
        --------
            None
        """
        #todo: can we make geting the records lazy?
        num_records = await self._get_approx_count()
        query = self.builder.create_embedding_index_query(index, lambda: num_records)
        
        async with await self.connect() as pool:
            await pool.execute(query)

    async def search(self,
                     query_embedding: Optional[List[float]] = None, 
                     limit: int = 10,
                     filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None,
                     predicates: Optional[Predicates] = None,
                     uuid_time_filter: Optional[UUIDTimeRange] = None,
                     ): 
        """
        Retrieves similar records using a similarity query.

        Parameters
        ----------
        query_embedding 
            The query embedding vector.
        limit 
            The number of nearest neighbors to retrieve.
        filter 
            A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched).
        predicates
            A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, |, and ~).

        Returns
        --------
            List: List of similar records.
        """
        (query, params) = self.builder.search_query(
            query_embedding, limit, filter, predicates, uuid_time_filter)
        async with await self.connect() as pool:
            return await pool.fetch(query, *params)

In [110]:
show_doc(Async.create_tables)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L843){target="_blank" style="float:right; font-size:smaller"}

### Async.create_tables

>      Async.create_tables ()

Creates necessary tables.

In [111]:
show_doc(Async.create_tables)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L843){target="_blank" style="float:right; font-size:smaller"}

### Async.create_tables

>      Async.create_tables ()

Creates necessary tables.

In [112]:
show_doc(Async.search)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L944){target="_blank" style="float:right; font-size:smaller"}

### Async.search

>      Async.search (query_embedding:Optional[List[float]]=None, limit:int=10,
>                    filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=No
>                    ne, predicates:Optional[__main__.Predicates]=None,
>                    uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)

Retrieves similar records using a similarity query.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| query_embedding | Optional | None | The query embedding vector. |
| limit | int | 10 | The number of nearest neighbors to retrieve. |
| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |
| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \|, and ~). |
| uuid_time_filter | Optional | None |  |
| **Returns** | **List: List of similar records.** |  |  |

In [117]:
#| hide
con = await asyncpg.connect(service_url)
await con.execute("DROP TABLE IF EXISTS data_table;")
await con.execute("DROP EXTENSION IF EXISTS vector CASCADE;")
await con.close()

## Usage Example

In [118]:
vec = Async(service_url, "data_table", 2)
await vec.create_tables()
empty = await vec.table_is_empty()
assert empty
await vec.upsert([(uuid.uuid4(), {"key": "val"}, "the brown fox", [1.0, 1.2])])
empty = await vec.table_is_empty()
assert not empty

await vec.upsert([
    (uuid.uuid4(), '''{"key":"val"}''', "the brown fox", [1.0, 1.3]),
    (uuid.uuid4(), '''{"key":"val2", "key_10": "10", "key_11": "11.3"}''', "the brown fox", [1.0, 1.4]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.5]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.6]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.6]),
    (uuid.uuid4(), '''{"key2":"val2"}''', "the brown fox", [1.0, 1.7]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.9]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 100.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 101.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.8]),
    (uuid.uuid4(), '''{"key_1":"val_1", "key_2":"val_2"}''',
     "the brown fox", [1.0, 1.8]),
])

await vec.create_embedding_index(IvfflatIndex())
await vec.drop_embedding_index()
await vec.create_embedding_index(IvfflatIndex(100))
await vec.drop_embedding_index()
await vec.create_embedding_index(HNSWIndex())
await vec.drop_embedding_index()
await vec.create_embedding_index(HNSWIndex(20, 125))
await vec.drop_embedding_index()
await vec.create_embedding_index(TimescaleVectorIndex())
await vec.drop_embedding_index()
await vec.create_embedding_index(TimescaleVectorIndex(False, 50, 50, 1.5))

rec = await vec.search([1.0, 2.0])
assert len(rec) == 10
rec = await vec.search([1.0, 2.0], limit=4)
assert len(rec) == 4
rec = await vec.search(limit=4)
assert len(rec) == 4
rec = await vec.search([1.0, 2.0], limit=4, filter={"key2": "val2"})
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, filter={"key2": "does not exist"})
assert len(rec) == 0
rec = await vec.search([1.0, 2.0], limit=4, filter={"key_1": "val_1"})
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], filter={"key_1": "val_1", "key_2": "val_2"})
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, filter={"key_1": "val_1", "key_2": "val_3"})
assert len(rec) == 0
rec = await vec.search(limit=4, filter={"key_1": "val_1", "key_2": "val_3"})
assert len(rec) == 0
rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 2
rec = await vec.search(limit=4, filter=[{"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 2

rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key_1": "val_1"}, {"key2": "val2"}, {"no such key": "no such val"}])
assert len(rec) == 2

assert isinstance(rec[0][SEARCH_RESULT_METADATA_IDX], dict)
assert isinstance(rec[0]["metadata"], dict)
assert rec[0]["contents"] == "the brown fox"


rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(("key", "val2")))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(("key", "==", "val2")))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key", "==", "val2"))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_10", "<", 100))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_10", "<", 10))
assert len(rec) == 0
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_10", "<=", 10))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_10", "<=", 10.0))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_11", "<=", 11.3))
assert len(rec) == 1
rec = await vec.search(limit=4, predicates=Predicates("key_11", ">=", 11.29999))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_11", "<", 11.299999))
assert len(rec) == 0

rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*[("key", "val2"), ("key_10", "<", 100)]))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(("key", "val2"), ("key_10", "<", 100), operator='AND'))
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(("key", "val2"), ("key_2", "val_2"), operator='OR'))
assert len(rec) == 2
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_10", "<", 100) & (Predicates("key","==", "val2",) | Predicates("key_2", "==", "val_2"))) 
assert len(rec) == 1
rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates("key_10", "<", 100) and (Predicates("key","==", "val2") or Predicates("key_2","==", "val_2"))) 
assert len(rec) == 1
rec = await vec.search(limit=4, predicates=~Predicates(("key", "val2"), ("key_10", "<", 100)))
assert len(rec) == 4

raised = False
try:
    # can't upsert using both keys and dictionaries
    await vec.upsert([
        (uuid.uuid4(), {"key": "val"}, "the brown fox", [1.0, 1.2]),
        (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.2])
    ])
except ValueError as e:
    raised = True
assert raised

raised = False
try:
    # can't upsert using both keys and dictionaries opposite order
    await vec.upsert([
        (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.2]),
        (uuid.uuid4(),  {"key": "val"}, "the brown fox", [1.0, 1.2])
    ])
except BaseException as e:
    raised = True
assert raised

rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 2
await vec.delete_by_ids([rec[0]["id"]])
rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 1
await vec.delete_by_metadata([{"key_1": "val_1"}, {"key2": "val2"}])
rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 0
rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key2": "val"}])
assert len(rec) == 4
await vec.delete_by_metadata([{"key2": "val"}])
rec = await vec.search([1.0, 2.0], limit=4, filter=[{"key2": "val"}])
assert len(rec) == 0

assert not await vec.table_is_empty()
await vec.delete_all()
assert await vec.table_is_empty()

await vec.drop_table()
await vec.close()

vec = Async(service_url, "data_table", 2, id_type="TEXT")
await vec.create_tables()
empty = await vec.table_is_empty()
assert empty
await vec.upsert([("Not a valid UUID", {"key": "val"}, "the brown fox", [1.0, 1.2])])
empty = await vec.table_is_empty()
assert not empty
await vec.delete_by_ids(["Not a valid UUID"])
empty = await vec.table_is_empty()
assert empty
await vec.drop_table()
await vec.close()

vec = Async(service_url, "data_table", 2, time_partition_interval=timedelta(seconds=60))
await vec.create_tables()
empty = await vec.table_is_empty()
assert empty
id = uuid.uuid1()
await vec.upsert([(id, {"key": "val"}, "the brown fox", [1.0, 1.2])])
empty = await vec.table_is_empty()
assert not empty
await vec.delete_by_ids([id])
empty = await vec.table_is_empty()
assert empty

raised = False
try:
    # can't upsert with uuid type 4 in time partitioned table
    await vec.upsert([
        (uuid.uuid4(),  {"key": "val"}, "the brown fox", [1.0, 1.2])
    ])
except BaseException as e:
    raised = True
assert raised

specific_datetime = datetime(2018, 8, 10, 15, 30, 0)
await vec.upsert([
        # current time
        (uuid.uuid1(),  {"key": "val"}, "the brown fox", [1.0, 1.2]),
        #time in 2018
        (uuid_from_time(specific_datetime),  {"key": "val"}, "the brown fox", [1.0, 1.2])
])
assert not await vec.table_is_empty()

#check all the possible ways to specify a date range
async def search_date(start_date, end_date, expected):
    #using uuid_time_filter
    rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date, end_date))
    assert len(rec) == expected
    rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(str(start_date), str(end_date)))
    assert len(rec) == expected
    
    #using filters
    filter = {}
    if start_date is not None:
        filter["__start_date"] = start_date
    if end_date is not None:
        filter["__end_date"] = end_date
    rec = await vec.search([1.0, 2.0], limit=4, filter=filter)
    assert len(rec) == expected
    #using filters with string dates
    filter = {}
    if start_date is not None:
        filter["__start_date"] = str(start_date)
    if end_date is not None:
        filter["__end_date"] = str(end_date)
    rec = await vec.search([1.0, 2.0], limit=4, filter=filter)
    assert len(rec) == expected
    #using predicates
    predicates = []
    if start_date is not None:
        predicates.append(("__uuid_timestamp", ">=", start_date))
    if end_date is not None:
        predicates.append(("__uuid_timestamp", "<", end_date))
    rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))
    assert len(rec) == expected
    #using predicates with string dates
    predicates = []
    if start_date is not None:
        predicates.append(("__uuid_timestamp", ">=", str(start_date)))
    if end_date is not None:
        predicates.append(("__uuid_timestamp", "<", str(end_date)))
    rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))
    assert len(rec) == expected

await search_date(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7), 1)
await search_date(specific_datetime-timedelta(days=7), None, 2)
await search_date(None, specific_datetime+timedelta(days=7), 1)
await search_date(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2), 0)

#check timedelta handling
rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date=specific_datetime, time_delta=timedelta(days=7)))
assert len(rec) == 1
#end is exclusive
rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime, time_delta=timedelta(days=7)))
assert len(rec) == 0
rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime+timedelta(seconds=1), time_delta=timedelta(days=7)))
assert len(rec) == 1
await vec.drop_table()
await vec.close()

# Sync Client

In [None]:
#| export
import psycopg2.pool
from contextlib import contextmanager
import psycopg2.extras
import pgvector.psycopg2
import numpy as np
import re

In [None]:
#| export
class Sync:
    translated_queries: Dict[str, str] = {}

    def __init__(
            self,
            service_url: str,
            table_name: str,
            num_dimensions: int,
            distance_type: str = 'cosine',
            id_type='UUID',
            time_partition_interval: Optional[timedelta] = None,
            max_db_connections: Optional[int] = None,
            infer_filters: bool = True,
            ) -> None:
        """
        Initializes a sync client for storing vector data.

        Parameters
        ----------
        service_url
            The connection string for the database.
        table_name
            The name of the table.
        num_dimensions
            The number of dimensions for the embedding vector.
        distance_type
            The distance type for indexing.
        id_type
            The type of the primary id column. Can be either 'UUID' or 'TEXT'.
        """
        self.builder = QueryBuilder(
            table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters)
        self.service_url = service_url
        self.pool = None
        self.max_db_connections = max_db_connections
        self.time_partition_interval = time_partition_interval
        psycopg2.extras.register_uuid()

    def default_max_db_connections(self):
        """
        Gets a default value for the number of max db connections to use.

        Returns
        -------
            None
        """
        query = self.builder.default_max_db_connection_query()
        conn = psycopg2.connect(dsn=self.service_url)
        with conn.cursor() as cur:
                cur.execute(query)
                num_connections = cur.fetchone() 
        conn.close()
        return num_connections[0]

    @contextmanager
    def connect(self):
        """
        Establishes a connection to a PostgreSQL database using psycopg2 and allows it's
        use in a context manager.
        """
        if self.pool == None:
            if self.max_db_connections == None:
                self.max_db_connections = self.default_max_db_connections()

            self.pool = psycopg2.pool.SimpleConnectionPool(
                1, self.max_db_connections, dsn=self.service_url, cursor_factory=psycopg2.extras.DictCursor)

        connection = self.pool.getconn()
        pgvector.psycopg2.register_vector(connection)
        try:
            yield connection
            connection.commit()
        finally:
            self.pool.putconn(connection)

    def close(self):
        if self.pool != None:
            self.pool.closeall()

    def _translate_to_pyformat(self, query_string, params):
        """
        Translates dollar sign number parameters and list parameters to pyformat strings.

        Args:
            query_string (str): The query string with parameters.
            params (list): List of parameter values.

        Returns:
            str: The query string with translated pyformat parameters.
            dict: A dictionary mapping parameter numbers to their values.
        """

        translated_params = {}
        if params != None:
            for idx, param in enumerate(params):
                translated_params[str(idx+1)] = param

        if query_string in self.translated_queries:
            return self.translated_queries[query_string], translated_params

        dollar_params = re.findall(r'\$[0-9]+', query_string)
        translated_string = query_string
        for dollar_param in dollar_params:
            # Extract the number after the $
            param_number = int(dollar_param[1:])
            if params != None:
                pyformat_param = '%s' if param_number == 0 else f'%({param_number})s'
            else:
                pyformat_param = '%s'
            translated_string = translated_string.replace(
                dollar_param, pyformat_param)

        self.translated_queries[query_string] = translated_string
        return self.translated_queries[query_string], translated_params

    def table_is_empty(self):
        """
        Checks if the table is empty.

        Returns
        -------
            bool: True if the table is empty, False otherwise.
        """
        query = self.builder.get_row_exists_query()
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query)
                rec = cur.fetchone()
                return rec == None
    
    def munge_record(self, records) -> Iterable[Tuple[uuid.UUID, str, str, List[float]]]:
        metadata_is_dict = isinstance(records[0][1], dict)
        if metadata_is_dict:
           records = map(lambda item: Sync._convert_record_meta_to_json(item), records)

        return records


    def _convert_record_meta_to_json(item):
        if not isinstance(item[1], dict):
            raise ValueError(
                "Cannot mix dictionary and string metadata fields in the same upsert")
        return (item[0], json.dumps(item[1]), item[2], item[3])

    def upsert(self, records):
        """
        Performs upsert operation for multiple records.

        Parameters
        ----------
        records
            Records to upsert.

        Returns
        -------
            None
        """
        records = self.munge_record(records)
        query = self.builder.get_upsert_query()
        query, _ = self._translate_to_pyformat(query, None)
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.executemany(query, records)

    def create_tables(self):
        """
        Creates necessary tables.

        Returns
        -------
            None
        """
        query = self.builder.get_create_query()
        #don't use a connection pool for this because the vector extension may not be installed yet and if it's not installed, register_vector will fail.
        conn = psycopg2.connect(dsn=self.service_url)
        with conn.cursor() as cur:
            cur.execute(query)
        conn.commit()
        conn.close()

    def delete_all(self, drop_index=True):
        """
        Deletes all data. Also drops the index if `drop_index` is true.

        Returns
        -------
            None
        """
        if drop_index:
            self.drop_embedding_index()
        query = self.builder.delete_all_query()
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query)

    def delete_by_ids(self, ids: Union[List[uuid.UUID], List[str]]):
        """
        Delete records by id.

        Parameters
        ----------
        ids
            List of ids to delete.
        """
        (query, params) = self.builder.delete_by_ids_query(ids)
        query, params = self._translate_to_pyformat(query, params)
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query, params)

    def delete_by_metadata(self, filter: Union[Dict[str, str], List[Dict[str, str]]]):
        """
        Delete records by metadata filters.
        """
        (query, params) = self.builder.delete_by_metadata_query(filter)
        query, params = self._translate_to_pyformat(query, params)
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query, params)

    def drop_table(self):
        """
        Drops the table

        Returns
        -------
            None
        """
        query = self.builder.drop_table_query()
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query)

    def _get_approx_count(self):
        """
        Retrieves an approximate count of records in the table.

        Returns
        -------
            int: Approximate count of records.
        """
        query = self.builder.get_approx_count_query()
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query)
                rec = cur.fetchone()
                return rec[0]

    def drop_embedding_index(self):
        """
        Drop any index on the emedding

        Returns
        -------
            None
        """
        query = self.builder.drop_embedding_index_query()
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query)
    
    def create_embedding_index(self, index: BaseIndex):
        """
        Creates an index on the embedding for the table.

        Parameters
        ----------
        index
            The index to create.

        Returns
        --------
            None
        """
        query = self.builder.create_embedding_index_query(index, lambda: self._get_approx_count())    
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query)

    def search(self, 
    query_embedding: Optional[List[float]] = None, 
    limit: int = 10, 
    filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]] = None,
    predicates: Optional[Predicates] = None,
    uuid_time_filter: Optional[UUIDTimeRange] = None,
    ):
        """
        Retrieves similar records using a similarity query.

        Parameters
        ----------
        query_embedding 
            The query embedding vector.
        limit 
            The number of nearest neighbors to retrieve.
        filter 
            A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched).
        predicates
            A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, |, and ~).

        Returns
        --------
            List: List of similar records.
        """
        if query_embedding is not None:
            query_embedding_np = np.array(query_embedding)
        else:
            query_embedding_np = None

        (query, params) = self.builder.search_query(
            query_embedding_np, limit, filter, predicates, uuid_time_filter)
        query, params = self._translate_to_pyformat(query, params)
        with self.connect() as conn:
            with conn.cursor() as cur:
                cur.execute(query, params)
                return cur.fetchall()

In [None]:
show_doc(Sync.create_tables)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1147){target="_blank" style="float:right; font-size:smaller"}

### Sync.create_tables

>      Sync.create_tables ()

Creates necessary tables.

In [None]:
show_doc(Sync.upsert)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1127){target="_blank" style="float:right; font-size:smaller"}

### Sync.upsert

>      Sync.upsert (records)

Performs upsert operation for multiple records.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| records |  | Records to upsert. |
| **Returns** | **None** |  |

In [None]:
show_doc(Sync.search)

---

[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1262){target="_blank" style="float:right; font-size:smaller"}

### Sync.search

>      Sync.search (query_embedding:Optional[List[float]]=None, limit:int=10,
>                   filter:Union[Dict[str,str],List[Dict[str,str]],NoneType]=Non
>                   e, predicates:Optional[__main__.Predicates]=None,
>                   uuid_time_filter:Optional[__main__.UUIDTimeRange]=None)

Retrieves similar records using a similarity query.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| query_embedding | Optional | None | The query embedding vector. |
| limit | int | 10 | The number of nearest neighbors to retrieve. |
| filter | Union | None | A filter for metadata. Should be specified as a key-value object or a list of key-value objects (where any objects in the list are matched). |
| predicates | Optional | None | A Predicates object to filter the results. Predicates support more complex queries than the filter parameter. Predicates can be combined using logical operators (&, \|, and ~). |
| uuid_time_filter | Optional | None |  |
| **Returns** | **List: List of similar records.** |  |  |

## Usage Example:

In [None]:
#| hide
con = await asyncpg.connect(service_url)
await con.execute("DROP TABLE IF EXISTS data_table;")
await con.execute("DROP EXTENSION IF EXISTS vector CASCADE")
await con.close()

In [None]:
vec = Sync(service_url, "data_table", 2)
vec.create_tables()
empty = vec.table_is_empty()

assert empty
vec.upsert([(uuid.uuid4(), {"key": "val"}, "the brown fox", [1.0, 1.2])])
empty = vec.table_is_empty()
assert not empty

vec.upsert([
    (uuid.uuid4(), '''{"key":"val"}''', "the brown fox", [1.0, 1.3]),
    (uuid.uuid4(), '''{"key":"val2"}''', "the brown fox", [1.0, 1.4]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.5]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.6]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.6]),
    (uuid.uuid4(), '''{"key2":"val2"}''', "the brown fox", [1.0, 1.7]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.9]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 100.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 101.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.8]),
    (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.8]),
    (uuid.uuid4(), '''{"key_1":"val_1", "key_2":"val_2"}''',
     "the brown fox", [1.0, 1.8]),
])

vec.create_embedding_index(IvfflatIndex())
vec.drop_embedding_index()
vec.create_embedding_index(IvfflatIndex(100))
vec.drop_embedding_index()
vec.create_embedding_index(HNSWIndex())
vec.drop_embedding_index()
vec.create_embedding_index(HNSWIndex(20, 125))
vec.drop_embedding_index()
vec.create_embedding_index(TimescaleVectorIndex())
vec.drop_embedding_index()
vec.create_embedding_index(TimescaleVectorIndex(False, 50, 50, 1.5))

rec = vec.search([1.0, 2.0])
assert len(rec) == 10
rec = vec.search(np.array([1.0, 2.0]))
assert len(rec) == 10
rec = vec.search([1.0, 2.0], limit=4)
assert len(rec) == 4
rec = vec.search(limit=4)
assert len(rec) == 4
rec = vec.search([1.0, 2.0], limit=4, filter={"key2": "val2"})
assert len(rec) == 1
rec = vec.search([1.0, 2.0], limit=4, filter={"key2": "does not exist"})
assert len(rec) == 0
rec = vec.search(limit=4, filter={"key2": "does not exist"})
assert len(rec) == 0
rec = vec.search([1.0, 2.0], limit=4, filter={"key_1": "val_1"})
assert len(rec) == 1
rec = vec.search([1.0, 2.0], filter={"key_1": "val_1", "key_2": "val_2"})
assert len(rec) == 1
rec = vec.search([1.0, 2.0], limit=4, filter={
                 "key_1": "val_1", "key_2": "val_3"})
assert len(rec) == 0

rec = vec.search([1.0, 2.0], limit=4, filter=[
                 {"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 2

rec = vec.search([1.0, 2.0], limit=4, filter=[{"key_1": "val_1"}, {
                 "key2": "val2"}, {"no such key": "no such val"}])
assert len(rec) == 2

raised = False
try:
    # can't upsert using both keys and dictionaries
    await vec.upsert([
        (uuid.uuid4(), {"key": "val"}, "the brown fox", [1.0, 1.2]),
        (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.2])
    ])
except ValueError as e:
    raised = True
assert raised

raised = False
try:
    # can't upsert using both keys and dictionaries opposite order
    await vec.upsert([
        (uuid.uuid4(), '''{"key2":"val"}''', "the brown fox", [1.0, 1.2]),
        (uuid.uuid4(),  {"key": "val"}, "the brown fox", [1.0, 1.2])
    ])
except BaseException as e:
    raised = True
assert raised

rec = vec.search([1.0, 2.0], filter={"key_1": "val_1", "key_2": "val_2"})
assert rec[0][SEARCH_RESULT_CONTENTS_IDX] == 'the brown fox'
assert rec[0]["contents"] == 'the brown fox'
assert rec[0][SEARCH_RESULT_METADATA_IDX] == {
    'key_1': 'val_1', 'key_2': 'val_2'}
assert rec[0]["metadata"] == {
    'key_1': 'val_1', 'key_2': 'val_2'}
assert isinstance(rec[0][SEARCH_RESULT_METADATA_IDX], dict)
assert rec[0][SEARCH_RESULT_DISTANCE_IDX] == 0.0009438353921149556
assert rec[0]["distance"] == 0.0009438353921149556

rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates("key","==", "val2"))
assert len(rec) == 1

rec = vec.search([1.0, 2.0], limit=4, filter=[
                 {"key_1": "val_1"}, {"key2": "val2"}])
len(rec) == 2
vec.delete_by_ids([rec[0][SEARCH_RESULT_ID_IDX]])
rec = vec.search([1.0, 2.0], limit=4, filter=[
                 {"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 1
vec.delete_by_metadata([{"key_1": "val_1"}, {"key2": "val2"}])
rec = vec.search([1.0, 2.0], limit=4, filter=[
                 {"key_1": "val_1"}, {"key2": "val2"}])
assert len(rec) == 0
rec = vec.search([1.0, 2.0], limit=4, filter=[{"key2": "val"}])
assert len(rec) == 4
vec.delete_by_metadata([{"key2": "val"}])
rec = vec.search([1.0, 2.0], limit=4, filter=[{"key2": "val"}])
len(rec) == 0

assert not vec.table_is_empty()
vec.delete_all()
assert vec.table_is_empty()

vec.drop_table()
vec.close()

vec = Sync(service_url, "data_table", 2, id_type="TEXT")
vec.create_tables()
assert vec.table_is_empty()
vec.upsert([("Not a valid UUID", {"key": "val"}, "the brown fox", [1.0, 1.2])])
assert not vec.table_is_empty()
vec.delete_by_ids(["Not a valid UUID"])
assert vec.table_is_empty()
vec.drop_table()
vec.close()

vec = Sync(service_url, "data_table", 2, time_partition_interval=timedelta(seconds=60))
vec.create_tables()
assert vec.table_is_empty()
id = uuid.uuid1()
vec.upsert([(id, {"key": "val"}, "the brown fox", [1.0, 1.2])])
assert not vec.table_is_empty()
vec.delete_by_ids([id])
assert vec.table_is_empty()
raised = False
try:
    # can't upsert with uuid type 4 in time partitioned table
    vec.upsert([
        (uuid.uuid4(),  {"key": "val"}, "the brown fox", [1.0, 1.2])
    ])
    #pass
except BaseException as e:
    raised = True
assert raised

specific_datetime = datetime(2018, 8, 10, 15, 30, 0)
vec.upsert([
        # current time
        (uuid.uuid1(),  {"key": "val"}, "the brown fox", [1.0, 1.2]),
        #time in 2018
        (uuid_from_time(specific_datetime),  {"key": "val"}, "the brown fox", [1.0, 1.2])
])

def search_date(start_date, end_date, expected):
    #using uuid_time_filter
    rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date, end_date))
    assert len(rec) == expected
    rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(str(start_date), str(end_date)))
    assert len(rec) == expected
    
    #using filters
    filter = {}
    if start_date is not None:
        filter["__start_date"] = start_date
    if end_date is not None:
        filter["__end_date"] = end_date
    rec = vec.search([1.0, 2.0], limit=4, filter=filter)
    assert len(rec) == expected
    #using filters with string dates
    filter = {}
    if start_date is not None:
        filter["__start_date"] = str(start_date)
    if end_date is not None:
        filter["__end_date"] = str(end_date)
    rec = vec.search([1.0, 2.0], limit=4, filter=filter)
    assert len(rec) == expected
    #using predicates
    predicates = []
    if start_date is not None:
        predicates.append(("__uuid_timestamp", ">=", start_date))
    if end_date is not None:
        predicates.append(("__uuid_timestamp", "<", end_date))
    rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))
    assert len(rec) == expected
    #using predicates with string dates
    predicates = []
    if start_date is not None:
        predicates.append(("__uuid_timestamp", ">=", str(start_date)))
    if end_date is not None:
        predicates.append(("__uuid_timestamp", "<", str(end_date)))
    rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))
    assert len(rec) == expected

assert not vec.table_is_empty()

search_date(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7), 1)
search_date(specific_datetime-timedelta(days=7), None, 2)
search_date(None, specific_datetime+timedelta(days=7), 1)
search_date(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2), 0)

#check timedelta handling
rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date=specific_datetime, time_delta=timedelta(days=7)))
assert len(rec) == 1
#end is exclusive
rec =  vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime, time_delta=timedelta(days=7)))
assert len(rec) == 0
rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime+timedelta(seconds=1), time_delta=timedelta(days=7)))
assert len(rec) == 1
vec.drop_table()
vec.close()

In [None]:
#| hide
import nbdev
nbdev.nbdev_export()