From afecdbe6918ba8cea3075a6e463000552fdd20d1 Mon Sep 17 00:00:00 2001 From: kik Date: Mon, 10 Jul 2023 09:44:28 +0000 Subject: [PATCH] REFACTOR postgresql-15.3 Update to postgresql-15.3 and add tests. --- .gitlab-ci.yml | 12 ++++++++++++ Dockerfile | 10 +++++----- README.md | 51 +++++++++++++++----------------------------------- test.sh | 19 +++++++++++++++++++ test.sql | 6 ++++++ 5 files changed, 57 insertions(+), 41 deletions(-) create mode 100644 .gitlab-ci.yml create mode 100755 test.sh create mode 100644 test.sql diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..00c9ba1 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,12 @@ +image: docker:latest +services: +- docker:dind + +stages: +- test + +testing_image: + stage: test + script: + - docker build -t pg-350d . + - ./test.sh diff --git a/Dockerfile b/Dockerfile index b05ea4b..3da18f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM postgres:9.6 +FROM postgres:15.3 MAINTAINER Olivier El Mekki -RUN apt-get update && apt-get install -y build-essential curl postgresql-server-dev-9.6 -RUN curl https://ftp.postgresql.org/pub/source/v9.6.0/postgresql-9.6.0.tar.bz2 -o /postgresql-9.6.0.tar.bz2 -RUN cd / && tar xvf postgresql-9.6.0.tar.bz2 -RUN cd /postgresql-9.6.0/contrib/cube && sed -i 's/#define CUBE_MAX_DIM (100)/#define CUBE_MAX_DIM (350)/' cubedata.h && \ +RUN apt update && apt install -y build-essential curl postgresql-server-dev-15 +RUN curl https://ftp.postgresql.org/pub/source/v15.3/postgresql-15.3.tar.bz2 -o /postgresql-15.3.tar.bz2 +RUN cd / && tar xvf postgresql-15.3.tar.bz2 +RUN cd /postgresql-15.3/contrib/cube && sed -i 's/#define CUBE_MAX_DIM (100)/#define CUBE_MAX_DIM (350)/' cubedata.h && \ USE_PGXS=true make && USE_PGXS=true make install diff --git a/README.md b/README.md index d6c4206..992efb9 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,30 @@ # pg350d -Docker build of postgresql-9.6 changing the dimension limit for the cube extension, raising it to 350. +Docker build of postgresql-15.3 changing the dimension limit for the cube +extension, raising it to 350. -This is needed to be able to work with words embedding with postgres. +This is needed to be able to work with words embedding or other machine +learning related vectors with postgres. -You can easily generate a build for your own need in term of dimensions by editing this dockerfile. +> Note: since pg350d was released, there has been some efforts to support +> machine learning friendly vectors in [pgvector](https://github.com/pgvector/pgvector), +> which supports up to 16k dimensions. +You can easily generate a build for your own need in term of dimensions by +editing this dockerfile. ## What is the problem again? -The cube extension, which you'll use to perform operations on vectors, has a hard limit of 100 dimensions per vector. - - -## But I can create vectors with more than 100 dimensions! - -Yup, I managed to do it too. With `INSERT` and `UPDATE`, the hard limit seems not to be properly checked. - -The problem happens (at least) when you try to import a dump. It will fail saying that you can't have -vectors with more than 100 dimensions. - -If you are using 101+ dimensions vectors with postgres currently, know that you won't be able to restore -your backups (and not upgrade postgres if you usually do so through dump/import) :) - - - -## Download - -The image dockerhub page is [here](https://hub.docker.com/r/oelmekki/pg350d/). - -To pull it: - -``` -docker pull oelmekki/pg350d:9.6 -``` - +The cube extension, which you'll use to perform operations on vectors, has +a hard limit of 100 dimensions per vector. ## Is it safe? -Patching the hardcoded limit is [the recommended way in postgres doc](https://www.postgresql.org/docs/9.5/static/cube.html#AEN169535). - -I've been using it for several months on my main business, and didn't encounter any problem so far. - - -## Variants - -If you want more than 350d and don't want to change it yourself, [@lisitsky made a 2000d variant](https://github.com/lisitsky/postgres-2kd). +Patching the hardcoded limit is [the recommended way in postgres +doc](https://www.postgresql.org/docs/current/cube.html#id-1.11.7.20.9). +I've been using it for a few years in production, and didn't encounter any +problem. ## How to raise postgresql's cube extension dimensions limit? diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..897026f --- /dev/null +++ b/test.sh @@ -0,0 +1,19 @@ +##!/usr/bin/env bash + +echo "Starting database…" +ID=$(docker run --rm -e POSTGRES_HOST_AUTH_METHOD=trust -d pg-350d) +sleep 10 + +echo "Running test…" +IP=$(docker inspect $ID | grep '"IPAddress"' | head -n 1 | awk '{ print $2 }' | sed 's/[",]//g') +psql -U postgres -h $IP -f ./test.sql | grep "ERROR" +ERR=$(test "$?" != "1") + +if [[ -n "$ERR" ]]; then + echo "$ERR" +else + echo "Success." +fi + +docker stop $ID &> /dev/null +exit $ERR diff --git a/test.sql b/test.sql new file mode 100644 index 0000000..d810df2 --- /dev/null +++ b/test.sql @@ -0,0 +1,6 @@ +CREATE EXTENSION cube; +CREATE TABLE vectors(vector cube); +-- vector of 350 dimensions +INSERT INTO vectors(vector) VALUES(cube(ARRAY[ +0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 +]));