In [None]:
%pip install sqlalchemy psycopg2 pgvector

In [None]:
import os, sys
import torch
import numpy as np


In [None]:
#pgurl = 'postgresql://username:password@databasehost:port/databasename'
dbname = f'test_corealchemy'
pgdburl = f'postgresql://root:root@localhost:55432/{dbname}'
pgrootdburl = 'postgresql://root:root@localhost:55432/root'

In [None]:
import sqlalchemy
from pgvector.sqlalchemy import Vector
from sqlalchemy.sql.schema import MetaData
from sqlalchemy import Table, Column, Integer, String, func

In [None]:
rootengine = sqlalchemy.create_engine(pgrootdburl, isolation_level='AUTOCOMMIT', echo=False)

dataengine = sqlalchemy.create_engine(pgdburl, isolation_level='AUTOCOMMIT', echo=False)

In [None]:
metadata_obj = MetaData()

tensor_table = Table(
    "tensordata", metadata_obj,
    Column("key", sqlalchemy.BigInteger, primary_key=True),
    Column("embedding", Vector(5)))

assert tensor_table.metadata == metadata_obj

In [None]:

def init_database():
    with rootengine.connect() as rootconnection:
        rows = rootconnection.execute(sqlalchemy.text(f"SELECT 1 FROM pg_database WHERE datname='{dbname}';"))
        if not rows.first():
            print(f"Database '{dbname}' does not exist and is beeing created.")
            rootconnection.execute(sqlalchemy.text(f'CREATE DATABASE "{dbname}";'))
            return True
    return False

def init_tables():
    with dataengine.connect() as dataconnection:
        print(f"Adding vector support to database '{dbname}'.")
        # add pgvector support
        dataconnection.execute(sqlalchemy.text('CREATE EXTENSION IF NOT EXISTS vector;'))
        print(f"Creating tables for empty database '{dbname}'.")
        # create tables
        metadata_obj.create_all(dataconnection)
    return True

def table_size():
    stmt = sqlalchemy.select(func.count("*")).select_from(tensor_table)
    with dataengine.connect() as dataconnection:
        rows = dataconnection.execute(stmt)
        first = rows.first()
        return first.count

nrows = 0 if init_database() and init_tables() else table_size()
print(f'Database {dbname} #rows: {nrows}.')

    

In [None]:
# Write data into the table 'textdata' in PostgreSQL database
# df.to_sql(name='textdata', con=dataengine, if_exists='fail')
# df = pd.read_sql(sql='textdata', con=dataengine)


In [None]:
a = torch.rand((int(1e4), 5))
print(a.shape)


In [None]:
item_dicts = [{ 'key': i, 'embedding': e } for i,e in enumerate(a)]
print(len(item_dicts))

In [None]:
item_dicts[0]

In [None]:
try:
    with dataengine.begin() as dataconnection:
        dataconnection.execute(tensor_table.insert(), item_dicts)
except:
    print('Data already added.')

In [None]:
# retrieve
stmt = sqlalchemy.select(tensor_table.c.embedding).where(tensor_table.c.key.in_([1, 2, 7, 8, 12, 241231]))

with dataengine.connect() as dataconnection:
    rows = dataconnection.execute(stmt)
    arr = np.array(list(zip(*rows))[0])
    tensors = torch.tensor(arr, dtype=torch.float32)
    print(tensors.shape)    

In [None]:
# df = pandas.read_sql(sql='textdata', con=dataengine)

In [None]:
rootengine.dispose()
dataengine.dispose()