In [1]:
%%capture
# !uv pip install -r ../requirements.txt # if using uv
!pip install -r ../requirements.txt 

In [2]:
# kill all connections to the database before running this notebook

import psycopg
import os 

with psycopg.connect(dbname='postgres', user=os.getenv('POSTGRES_USER'), password=os.getenv('POSTGRES_PASSWORD'), host='localhost') as conn:
    conn.autocommit = True
    with conn.cursor() as cur:
        cur.execute("""
            SELECT pg_terminate_backend(pid)
            FROM pg_stat_activity
            WHERE datname = 'test_db'
              AND pid <> pg_backend_pid();
        """)

In [3]:
import os
os.chdir('../..') # to be able to import 
import pandas as pd

from src.db_utils import (create_db, create_embeddings_table,
                      create_pgvector_extension, delete_db,
                      insert_data_into_table, pg_connection)

In [4]:
# create a database
DB_NAME = 'test_db'
create_db(db_name=DB_NAME)



In [5]:
# create the pgvector extension
create_pgvector_extension(db_name=DB_NAME)

INFO:root:pgvector extension created


In [6]:
# create the embeddings table
create_embeddings_table(DB_NAME)

INFO:root:Embeddings table 'pg_embeddings_test' created.


In [7]:
# viewing the embeddings table
TB_NAME = 'pg_embeddings_test'
CONN = pg_connection(db_name=DB_NAME)
pd.read_sql_query(f'SELECT * FROM {TB_NAME}', CONN)

  pd.read_sql_query(f'SELECT * FROM {TB_NAME}', CONN)


Unnamed: 0,id,chunk,embedding
0,07b101d8-108c-47e3-92fa-98b2a3a4d6a7,I'm a physicist and a Data Scientist,"[-0.048952606,-0.057101876,0.028381784,0.09913..."
1,15b34629-043c-4a46-805e-bf85d12a179a,I don't linke the Copenhagen interpretation,"[-0.0031696414,0.07755055,0.009189781,0.029925..."


In [8]:
# inserting data in the embeddings table
text_chunks = ['hello', 'world', 'how', 'are', 'you']
text_embeddings = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8], [0.9, 1.0]] # fake embeddings 
insert_data_into_table(DB_NAME, text_chunks, text_embeddings, TB_NAME)
pd.read_sql_query(f'SELECT * FROM {TB_NAME}', CONN)

INFO:root:Data inserted into table 'pg_embeddings_test'. Failed chunks: 0
  pd.read_sql_query(f'SELECT * FROM {TB_NAME}', CONN)


Unnamed: 0,id,chunk,embedding
0,07b101d8-108c-47e3-92fa-98b2a3a4d6a7,I'm a physicist and a Data Scientist,"[-0.048952606,-0.057101876,0.028381784,0.09913..."
1,15b34629-043c-4a46-805e-bf85d12a179a,I don't linke the Copenhagen interpretation,"[-0.0031696414,0.07755055,0.009189781,0.029925..."
2,22913834-2aac-4487-92be-a0a5cd024d12,hello,"[0.1,0.2]"
3,e05101d4-c306-45ed-92dc-37eb96c9b608,world,"[0.3,0.4]"
4,606d1749-513b-4457-82e0-7ee178b3a271,how,"[0.5,0.6]"
5,6e34f431-f042-4915-9ec9-bb6a7097eea9,are,"[0.7,0.8]"
6,c8d7bb98-9d84-4e68-92be-6fd6d3e7b1c6,you,"[0.9,1]"


In [9]:
# delete the database
CONN.close()
delete_db(DB_NAME)

INFO:root:Database test_db deleted
