In [None]:
import os
import time
from dotenv import load_dotenv
import pandas as pd
from sqlalchemy import create_engine, text

load_dotenv()
start = time.time()

# Connection string
conn_str = (
    f"postgresql://{os.getenv('PG_USER')}:"
    f"{os.getenv('PG_PASSWORD')}@"
    f"{os.getenv('PG_HOST')}:"
    f"{os.getenv('PG_PORT')}/"
    f"{os.getenv('PG_DATABASE')}"
)

engine = create_engine(conn_str, connect_args={"connect_timeout": 5})

# ---- Run SQL ----
with engine.connect() as conn:
    # Ping
    conn.execute(text("SELECT 1"))
    print("DB ping OK")

    df_tables = pd.read_sql("""
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema = 'public'
        ORDER BY table_name
    """, conn)

df_tables

In [None]:
with engine.connect() as conn:
    df_cols = pd.read_sql("""
        SELECT table_name, column_name
        FROM information_schema.columns
        WHERE table_schema = 'public'
        ORDER BY table_name, ordinal_position
    """, conn)

for table, group in df_cols.groupby("table_name"):
    print(table)
    print("  " + ", ".join(group["column_name"]))

engine.dispose()
print("took", round(time.time() - start, 2), "s")

In [None]:
query1 = """
WITH jazz_customers AS (
  SELECT DISTINCT i.customer_id
  FROM invoice i
  JOIN invoice_line il ON il.invoice_id = i.invoice_id
  JOIN track t ON t.track_id = il.track_id
  JOIN genre g ON g.genre_id = t.genre_id
  WHERE g.name = 'Jazz'
),
customer_totals AS (
  SELECT
        customer_id, 
        SUM(total) AS total_spent
  FROM invoice
  GROUP BY customer_id
)
SELECT
    CASE
        WHEN ct.customer_id IN (SELECT customer_id FROM jazz_customers)
        THEN 'jazz' ELSE 'non_jazz'
    END AS customer_segment,
    AVG(ct.total_spent) AS avg_total_spent
FROM customer_totals ct
GROUP BY customer_segment;
"""

df1 = pd.read_sql(query1, conn)
print("Jazz vs Non-Jazz Customers Analysis:")
print(df1)

In [None]:
query2 = """
CREATE INDEX IF NOT EXISTS idx_track_lower_name
ON track (LOWER(name));
"""

cursor = conn.cursor()
cursor.execute(query2)
conn.commit()
cursor.close()

print("Index created successfully!")

In [None]:
query3 = """
SELECT album.title
FROM track
JOIN album ON track.album_id = album.album_id
WHERE LOWER(track.name) = LOWER('Enter Sandman');
"""

df3 = pd.read_sql(query3, conn)
print("Search results for 'Enter Sandman':")
print(df3)

In [None]:
# Close the connection when done
conn.close()
print("Connection closed.")