### Setup and Imports

In [11]:
import duckdb
import pandas as pd
from pathlib import Path

# Define the path to the DuckDB database file generated by dbt
# This path is relative to the 'analytics' directory.
DB_PATH = '../pipelines/data_lake/prod.duckdb'

# Connect to the database
con = duckdb.connect(database=str(DB_PATH), read_only=True)
print("Successfully connected to the dbt data warehouse.")

Successfully connected to the dbt data warehouse.


### List Available Models

In [12]:
print("\n--- Available Tables/Models in 'main' schema ---")
print(con.execute("SHOW TABLES").df())


--- Available Tables/Models in 'main' schema ---
                           name
0  all_transactions_by_customer
1                    statements


### Load the all_transactions_by_customer Model

In [13]:
print("\n--- Loading 'all_transactions_by_customer' model ---")
transactions_df = con.execute("SELECT * FROM main.all_transactions_by_customer").df()
print("Schema:")
transactions_df.info()
print("\nFirst 5 rows:")
print(transactions_df.head())


--- Loading 'all_transactions_by_customer' model ---
Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1217 entries, 0 to 1216
Data columns (total 25 columns):
 #   Column                                     Non-Null Count  Dtype         
---  ------                                     --------------  -----         
 0   username                                   1217 non-null   object        
 1   email                                      1217 non-null   object        
 2   address                                    1217 non-null   object        
 3   financial_institution                      1217 non-null   object        
 4   employer_name                              1217 non-null   object        
 5   login_id                                   1217 non-null   object        
 6   request_id                                 1217 non-null   object        
 7   request_datetime                           1217 non-null   object        
 8   request_status                      

### Ad-Hoc Analysis & Prototyping

In [14]:
print("\n--- Ad-Hoc Query: Total Withdrawals per Customer ---")
query = """
SELECT 
    email, 
    SUM(withdrawals) as total_withdrawals
FROM main.all_transactions_by_customer
GROUP BY email
ORDER BY total_withdrawals DESC
"""
print(con.execute(query).df())


--- Ad-Hoc Query: Total Withdrawals per Customer ---
                    email  total_withdrawals
0  JOELSCHAUBEL@GMAIL.COM          265788.41


### Closing the Connection

In [None]:
con.close()
print("\nDatabase connection closed.") 