In [12]:
from sqlalchemy import create_engine, Column, Integer, String, MetaData, Table, text, inspect
from pandas.io.sql import get_schema

import pandas as pd

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the password from the environment variable
password = os.getenv("password")

table_name = 'customer_features'
file_name = 'test_new3'


In [4]:

# Define database connection
db_uri = f"postgresql+psycopg2://user:{password}@localhost:4321/mydb"
engine = create_engine(db_uri)


In [None]:
df = pd.read_csv(f'../data/{file_name}.csv')

In [35]:
# experimental code to insert data into the database
# def get_date_by_month(month: int, year: int = 2025, day: int = 1):
#     return pd.to_datetime(f"{year}-{month:02d}-{day:02d}")

# # Example usage:
# print(get_date_by_month(10))

In [8]:
df.head()

Unnamed: 0,date,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,2025-03-01,-5.92992,9.044346,3.233961,1.941851,-3.105652,-8.121441,-6.100952,2.002023,4.483771,-2.441135
1,2025-03-01,5.042946,-10.798738,-8.088171,-6.636371,-6.470309,3.306359,-2.932694,4.491168,0.556842,-2.572778
2,2025-03-01,-1.710014,7.024047,0.160007,5.218641,-10.405809,-12.471499,-11.25492,11.533326,2.557083,2.294851
3,2025-03-01,-0.580756,7.261483,9.216444,0.711256,-10.377081,-10.451276,-5.537499,2.652932,11.30556,1.877472
4,2025-03-01,-12.551642,14.319912,5.067693,-10.191314,-9.106948,-0.345551,0.365073,-3.487628,-3.600268,-2.465096


In [9]:
df['date'] = pd.to_datetime(df['date'], utc=True)

In [10]:
df.date.dtype

datetime64[ns, UTC]

In [14]:


# Get the schema of the dataframe as it would be created in SQL

inspector = inspect(engine)
# Example: get columns for a table named 'table_name' (change as needed)
# columns = inspector.get_columns('table_name')
# print(columns)

# Or, get the SQL schema for the DataFrame
schema_sql = get_schema(df, name='table_name', con=engine)
schema_sql = schema_sql.replace(
    'CREATE TABLE',
    'CREATE TABLE IF NOT EXISTS',
    1  # The '1' ensures we only replace the first instance
)
print(schema_sql)


CREATE TABLE IF NOT EXISTS table_name (
	date TIMESTAMP WITH TIME ZONE, 
	x1 FLOAT(53), 
	x2 FLOAT(53), 
	x3 FLOAT(53), 
	x4 FLOAT(53), 
	x5 FLOAT(53), 
	x6 FLOAT(53), 
	x7 FLOAT(53), 
	x8 FLOAT(53), 
	x9 FLOAT(53), 
	x10 FLOAT(53)
)




In [40]:
# # Write the DataFrame to SQL using the schema definition
# with engine.connect() as conn:
#     conn.execute(text(schema_sql))  # Create the table with the defined schema
#     df.to_sql('table_name', con=conn, if_exists='append', index=False)
#     conn.commit()

In [41]:
# Query all data from the 'table_name' table and load into a new DataFrame
df_sql = pd.read_sql("SELECT * FROM table_name", con=engine)
df_sql.head()

Unnamed: 0,date,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10


In [15]:
# Assume 'engine' and 'df' are already created
# Assume 'schema_sql' is your CREATE TABLE string

# 1. Create an inspector object from the engine
inspector = inspect(engine)

# 2. Check if the table already exists in the database
if not inspector.has_table(table_name):
    print(f'Table {table_name} does not exist. Creating it now...')
    with engine.connect() as conn:
        conn.execute(text(schema_sql))
        conn.commit() # Commit the table creation
    print(f"Table {table_name} created successfully.")
else:
    print(f"Table {table_name} already exists.")

# 3. Now, you can safely append data
print(f'Appending data to {table_name} table...')
with engine.connect() as conn:
    df.to_sql(table_name, con=conn, if_exists='append', index=False)
    # The to_sql method in pandas often uses its own transaction handling,
    # but an explicit commit here is safe and good practice in SQLAlchemy 2.0.
    conn.commit()

print("Data successfully written to the database.")


Table test_new3 does not exist. Creating it now...
Table test_new3 created successfully.
Appending data to test_new3 table...
Data successfully written to the database.
