Skip to content

Commit 583a733

Browse files
authored
feat(drizzle): support half-precision, binary, and sparse vectors column types (#12491)
Adds support for `halfvec` and `sparsevec` and `bit` (binary vector) column types. This is required for supporting indexing of embeddings > 2000 dimensions on postgres using the pg-vector extension.
1 parent 6e5ddc8 commit 583a733

File tree

11 files changed

+464
-234
lines changed

11 files changed

+464
-234
lines changed

.github/workflows/main.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ jobs:
187187

188188
services:
189189
postgres:
190-
image: ${{ (startsWith(matrix.database, 'postgres') ) && 'postgis/postgis:16-3.4' || '' }}
190+
# Custom postgres 17 docker image that supports both pg-vector and postgis: https://github.com/payloadcms/postgis-vector
191+
image: ${{ (startsWith(matrix.database, 'postgres') ) && 'ghcr.io/payloadcms/postgis-vector:latest' || '' }}
191192
env:
192193
# must specify password for PG Docker container image, see: https://registry.hub.docker.com/_/postgres?tab=description&page=1&name=10
193194
POSTGRES_USER: ${{ env.POSTGRES_USER }}

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@
151151
"create-payload-app": "workspace:*",
152152
"cross-env": "7.0.3",
153153
"dotenv": "16.4.7",
154-
"drizzle-kit": "0.31.0",
155-
"drizzle-orm": "0.43.1",
154+
"drizzle-kit": "0.31.4",
155+
"drizzle-orm": "0.44.2",
156156
"escape-html": "^1.0.3",
157157
"execa": "5.1.1",
158158
"form-data": "3.0.1",
@@ -166,7 +166,7 @@
166166
"next": "15.3.2",
167167
"open": "^10.1.0",
168168
"p-limit": "^5.0.0",
169-
"pg": "8.11.3",
169+
"pg": "8.16.3",
170170
"playwright": "1.50.0",
171171
"playwright-core": "1.50.0",
172172
"prettier": "3.5.3",

packages/db-postgres/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@
7878
"@payloadcms/drizzle": "workspace:*",
7979
"@types/pg": "8.10.2",
8080
"console-table-printer": "2.12.1",
81-
"drizzle-kit": "0.31.1",
81+
"drizzle-kit": "0.31.4",
8282
"drizzle-orm": "0.44.2",
83-
"pg": "8.11.3",
83+
"pg": "8.16.3",
8484
"prompts": "2.4.2",
8585
"to-snake-case": "1.0.0",
8686
"uuid": "10.0.0"

packages/db-sqlite/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
"@libsql/client": "0.14.0",
7777
"@payloadcms/drizzle": "workspace:*",
7878
"console-table-printer": "2.12.1",
79-
"drizzle-kit": "0.31.1",
79+
"drizzle-kit": "0.31.4",
8080
"drizzle-orm": "0.44.2",
8181
"prompts": "2.4.2",
8282
"to-snake-case": "1.0.0",

packages/db-vercel-postgres/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@
7878
"@payloadcms/drizzle": "workspace:*",
7979
"@vercel/postgres": "^0.9.0",
8080
"console-table-printer": "2.12.1",
81-
"drizzle-kit": "0.31.1",
81+
"drizzle-kit": "0.31.4",
8282
"drizzle-orm": "0.44.2",
83-
"pg": "8.11.3",
83+
"pg": "8.16.3",
8484
"prompts": "2.4.2",
8585
"to-snake-case": "1.0.0",
8686
"uuid": "10.0.0"

packages/drizzle/src/postgres/columnToCodeConverter.ts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,26 @@ export const columnToCodeConverter: ColumnToCodeConverter = ({
2424

2525
const columnBuilderArgsArray: string[] = []
2626

27-
if (column.type === 'timestamp') {
28-
columnBuilderArgsArray.push(`mode: '${column.mode}'`)
29-
if (column.withTimezone) {
30-
columnBuilderArgsArray.push('withTimezone: true')
27+
switch (column.type) {
28+
case 'bit':
29+
case 'halfvec':
30+
case 'sparsevec':
31+
case 'vector': {
32+
if (column.dimensions) {
33+
columnBuilderArgsArray.push(`dimensions: ${column.dimensions}`)
34+
}
35+
break
3136
}
32-
33-
if (typeof column.precision === 'number') {
34-
columnBuilderArgsArray.push(`precision: ${column.precision}`)
35-
}
36-
}
37-
38-
if (column.type === 'vector') {
39-
if (column.dimensions) {
40-
columnBuilderArgsArray.push(`dimensions: ${column.dimensions}`)
37+
case 'timestamp': {
38+
columnBuilderArgsArray.push(`mode: '${column.mode}'`)
39+
if (column.withTimezone) {
40+
columnBuilderArgsArray.push('withTimezone: true')
41+
}
42+
43+
if (typeof column.precision === 'number') {
44+
columnBuilderArgsArray.push(`precision: ${column.precision}`)
45+
}
46+
break
4147
}
4248
}
4349

packages/drizzle/src/postgres/schema/buildDrizzleTable.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import type { ForeignKeyBuilder, IndexBuilder } from 'drizzle-orm/pg-core'
22

33
import {
4+
bit,
45
boolean,
56
foreignKey,
7+
halfvec,
68
index,
79
integer,
810
jsonb,
911
numeric,
1012
serial,
13+
sparsevec,
1114
text,
1215
timestamp,
1316
uniqueIndex,
@@ -44,6 +47,14 @@ export const buildDrizzleTable = ({
4447

4548
for (const [key, column] of Object.entries(rawTable.columns)) {
4649
switch (column.type) {
50+
case 'bit': {
51+
const builder = bit(column.name, { dimensions: column.dimensions })
52+
53+
columns[key] = builder
54+
55+
break
56+
}
57+
4758
case 'enum':
4859
if ('locale' in column) {
4960
columns[key] = adapter.enums.enum__locales(column.name)
@@ -56,6 +67,21 @@ export const buildDrizzleTable = ({
5667
}
5768
break
5869

70+
case 'halfvec': {
71+
const builder = halfvec(column.name, { dimensions: column.dimensions })
72+
73+
columns[key] = builder
74+
break
75+
}
76+
77+
case 'sparsevec': {
78+
const builder = sparsevec(column.name, { dimensions: column.dimensions })
79+
80+
columns[key] = builder
81+
82+
break
83+
}
84+
5985
case 'timestamp': {
6086
let builder = timestamp(column.name, {
6187
mode: column.mode,

packages/drizzle/src/types.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,30 @@ export type VectorRawColumn = {
281281
type: 'vector'
282282
} & BaseRawColumn
283283

284+
export type HalfVecRawColumn = {
285+
dimensions?: number
286+
type: 'halfvec'
287+
} & BaseRawColumn
288+
289+
export type SparseVecRawColumn = {
290+
dimensions?: number
291+
type: 'sparsevec'
292+
} & BaseRawColumn
293+
294+
export type BinaryVecRawColumn = {
295+
dimensions?: number
296+
type: 'bit'
297+
} & BaseRawColumn
298+
284299
export type RawColumn =
285300
| ({
286301
type: 'boolean' | 'geometry' | 'jsonb' | 'numeric' | 'serial' | 'text' | 'varchar'
287302
} & BaseRawColumn)
303+
| BinaryVecRawColumn
288304
| EnumRawColumn
305+
| HalfVecRawColumn
289306
| IntegerRawColumn
307+
| SparseVecRawColumn
290308
| TimestampRawColumn
291309
| UUIDRawColumn
292310
| VectorRawColumn

0 commit comments

Comments
 (0)