In [1]:
from sqlalchemy import create_engine, Column, Integer, String, MetaData, Table, text
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the password from the environment variable
password = os.getenv("password")

table_name = 'customer_features'


In [4]:

# Define database connection
db_uri = f"postgresql+psycopg2://user:{password}@localhost:4321/mydb"
engine = create_engine(db_uri)


In [6]:
df = pd.read_csv(f'../data/{table_name}.csv')

In [None]:
# experimental code to insert data into the database
# def get_date_by_month(month: int, year: int = 2025, day: int = 1):
#     return pd.to_datetime(f"{year}-{month:02d}-{day:02d}")

# # Example usage:
# print(get_date_by_month(10))

In [7]:
df.head()

Unnamed: 0,date,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,2025-01-01,-3.267453,8.607125,3.476517,-0.056344,-8.178182,-9.32799,-8.770161,5.783576,2.489872,1.04966
1,2025-01-01,4.529118,-8.764866,-1.039529,-5.733686,2.380684,4.958168,-6.491922,-3.495332,0.938702,-11.150605
2,2025-01-01,-10.066174,8.664548,5.865337,-7.598038,-3.132749,-6.976551,-1.480838,3.537761,0.635522,-5.038658
3,2025-01-01,-1.832205,8.18371,5.905443,6.514555,-6.515895,-6.383668,-9.75705,5.623834,3.682972,2.449284
4,2025-01-01,-1.793623,10.135855,6.805981,4.080774,-9.634966,-8.75576,-7.808257,8.351095,3.052396,11.866915


In [12]:
df['date'] = pd.to_datetime(df['date'])

In [13]:
df.date.dtype

dtype('<M8[ns]')

In [14]:
from sqlalchemy import inspect
from pandas.io.sql import get_schema

# Get the schema of the dataframe as it would be created in SQL

inspector = inspect(engine)
# Example: get columns for a table named 'table_name' (change as needed)
# columns = inspector.get_columns('table_name')
# print(columns)

# Or, get the SQL schema for the DataFrame
schema_sql = get_schema(df, name='table_name', con=engine)
print(schema_sql)


CREATE TABLE table_name (
	date TIMESTAMP WITHOUT TIME ZONE, 
	x1 FLOAT(53), 
	x2 FLOAT(53), 
	x3 FLOAT(53), 
	x4 FLOAT(53), 
	x5 FLOAT(53), 
	x6 FLOAT(53), 
	x7 FLOAT(53), 
	x8 FLOAT(53), 
	x9 FLOAT(53), 
	x10 FLOAT(53)
)




In [None]:
# Write the DataFrame to SQL using the schema definition
with engine.connect() as conn:
    conn.execute(text(schema_sql))  # Create the table with the defined schema
    df.to_sql('table_name', con=conn, if_exists='append', index=False)
    conn.commit()

In [None]:
# Query all data from the 'table_name' table and load into a new DataFrame
df_sql = pd.read_sql("SELECT * FROM table_name", con=engine)
df_sql.head()

In [None]:
# Assume 'engine' and 'df' are already created
# Assume 'schema_sql' is your CREATE TABLE string

# 1. Create an inspector object from the engine
inspector = inspect(engine)

# 2. Check if the table already exists in the database
if not inspector.has_table(table_name):
    print(f'Table {table_name} does not exist. Creating it now...')
    with engine.connect() as conn:
        conn.execute(text(schema_sql))
        conn.commit() # Commit the table creation
    print(f"Table {table_name} created successfully.")
else:
    print(f"Table {table_name} already exists.")

# 3. Now, you can safely append data
print(f'Appending data to {table_name} table...')
with engine.connect() as conn:
    df.to_sql(table_name, con=conn, if_exists='append', index=False)
    # The to_sql method in pandas often uses its own transaction handling,
    # but an explicit commit here is safe and good practice in SQLAlchemy 2.0.
    conn.commit()

print("Data successfully written to the database.")


Table 'table_name' already exists.
Appending data to 'table_name' table...
Data successfully written to the database.
