In [1]:
from sqlalchemy import create_engine, Column, Integer, String, MetaData, Table, text
import pandas as pd

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the password from the environment variable
password = 'password'



In [3]:

# Define database connection
db_uri = f"postgresql+psycopg2://user:{password}@localhost:4321/mydb"
engine = create_engine(db_uri)


In [4]:
df = pd.read_csv("../data/test_ref.csv")

In [5]:
df.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,-3.267453,8.607125,3.476517,-0.056344,-8.178182,-9.32799,-8.770161,5.783576,2.489872,1.04966
1,4.529118,-8.764866,-1.039529,-5.733686,2.380684,4.958168,-6.491922,-3.495332,0.938702,-11.150605
2,-10.066174,8.664548,5.865337,-7.598038,-3.132749,-6.976551,-1.480838,3.537761,0.635522,-5.038658
3,-1.832205,8.18371,5.905443,6.514555,-6.515895,-6.383668,-9.75705,5.623834,3.682972,2.449284
4,-1.793623,10.135855,6.805981,4.080774,-9.634966,-8.75576,-7.808257,8.351095,3.052396,11.866915


In [6]:
from sqlalchemy import inspect
from pandas.io.sql import get_schema

# Get the schema of the dataframe as it would be created in SQL

inspector = inspect(engine)
# Example: get columns for a table named 'test_ref' (change as needed)
# columns = inspector.get_columns('test_ref')
# print(columns)

# Or, get the SQL schema for the DataFrame
schema_sql = get_schema(df, name='test_ref', con=engine)
print(schema_sql)


CREATE TABLE test_ref (
	x1 FLOAT(53), 
	x2 FLOAT(53), 
	x3 FLOAT(53), 
	x4 FLOAT(53), 
	x5 FLOAT(53), 
	x6 FLOAT(53), 
	x7 FLOAT(53), 
	x8 FLOAT(53), 
	x9 FLOAT(53), 
	x10 FLOAT(53)
)




In [7]:
# Write the DataFrame to SQL using the schema definition
with engine.connect() as conn:
    conn.execute(text(schema_sql))  # Create the table with the defined schema
    df.to_sql('test_ref', con=conn, if_exists='append', index=False)
    conn.commit()

In [8]:
# Query all data from the 'test_ref' table and load into a new DataFrame
df_sql = pd.read_sql("SELECT * FROM test_ref", con=engine)
df_sql.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,-3.267453,8.607125,3.476517,-0.056344,-8.178182,-9.32799,-8.770161,5.783576,2.489872,1.04966
1,4.529118,-8.764866,-1.039529,-5.733686,2.380684,4.958168,-6.491922,-3.495332,0.938702,-11.150605
2,-10.066174,8.664548,5.865337,-7.598038,-3.132749,-6.976551,-1.480838,3.537761,0.635522,-5.038658
3,-1.832205,8.18371,5.905443,6.514555,-6.515895,-6.383668,-9.75705,5.623834,3.682972,2.449284
4,-1.793623,10.135855,6.805981,4.080774,-9.634966,-8.75576,-7.808257,8.351095,3.052396,11.866915


In [None]:
## doublette, better version here instead of the above

from sqlalchemy import create_engine, inspect, text
import pandas as pd

# Assume 'engine' and 'df' are already created
# Assume 'schema_sql' is your CREATE TABLE string

# 1. Create an inspector object from the engine
inspector = inspect(engine)

# 2. Check if the table already exists in the database
if not inspector.has_table('test_ref'):
    print("Table 'test_ref' does not exist. Creating it now...")
    with engine.connect() as conn:
        conn.execute(text(schema_sql))
        conn.commit() # Commit the table creation
    print("Table created successfully.")
else:
    print("Table 'test_ref' already exists.")

# 3. Now, you can safely append data
print("Appending data to 'test_ref' table...")
with engine.connect() as conn:
    df.to_sql('test_ref2', con=conn, if_exists='append', index=False)
    # The to_sql method in pandas often uses its own transaction handling,
    # but an explicit commit here is safe and good practice in SQLAlchemy 2.0.
    conn.commit()

print("✅ Data successfully written to the database.")


Table 'test_ref' already exists.
Appending data to 'test_ref' table...
✅ Data successfully written to the database.
