### Setup and Imports

In [40]:
import duckdb
import pandas as pd
from pathlib import Path

def run_query(sql_query: str) -> pd.DataFrame:
    """
    Connects to the dbt warehouse, runs a SQL query,
    prints the results beautifully, and returns a DataFrame.
    """
    db_path = Path('../data_lake/prod.duckdb')
    df = pd.DataFrame() # Initialize an empty DataFrame
    try:
        with duckdb.connect(database=str(db_path), read_only=True) as con:
            df = con.execute(sql_query).df()
    except Exception as e:
        print(f"An error occurred: {e}")
    return df

print("Helper function `run_query` is defined.")

Helper function `run_query` is defined.


### List Available Models

In [41]:
print("\n--- Available Tables/Models in 'main' schema ---")
run_query("SHOW TABLES")


--- Available Tables/Models in 'main' schema ---


Unnamed: 0,name
0,all_transactions_by_customer
1,dim_calendar
2,statements


### Load the all_transactions_by_customer Model

In [42]:
print("\n--- Loading 'all_transactions_by_customer' model ---")
transactions_df = run_query("SELECT * FROM main.all_transactions_by_customer")
print("Schema:")
transactions_df.info()
print("\nFirst 5 rows:")
transactions_df.head()


--- Loading 'all_transactions_by_customer' model ---
Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1217 entries, 0 to 1216
Data columns (total 25 columns):
 #   Column                                     Non-Null Count  Dtype         
---  ------                                     --------------  -----         
 0   username                                   1217 non-null   object        
 1   email                                      1217 non-null   object        
 2   address                                    1217 non-null   object        
 3   financial_institution                      1217 non-null   object        
 4   employer_name                              1217 non-null   object        
 5   login_id                                   1217 non-null   object        
 6   request_id                                 1217 non-null   object        
 7   request_datetime                           1217 non-null   object        
 8   request_status                      

Unnamed: 0,username,email,address,financial_institution,employer_name,login_id,request_id,request_datetime,request_status,days_detected,...,date,description,category,subcategory,withdrawals,deposits,balance,most_recent_statement_date,most_recent_statement_date_minus_90_days,most_recent_statement_date_minus_180_days
0,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,"36 HOLKHAM AVE, ANCASTER, ON, L9K1P1",Simplii,,5eff116b-d0d9-4924-4b37-08dc29c779f9,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,Get Statements Completed,,...,2024-02-08,NSF FEE NSF S/C,debit,,45.0,,1482.55,2024-02-09,2023-11-11,2023-08-13
1,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,"36 HOLKHAM AVE, ANCASTER, ON, L9K1P1",Simplii,,5eff116b-d0d9-4924-4b37-08dc29c779f9,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,Get Statements Completed,,...,2023-03-29,POS MERCHANDISE AIR-SERV MAC S/,debit,,2.0,,3043.4,2024-02-09,2023-11-11,2023-08-13
2,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,"36 HOLKHAM AVE, ANCASTER, ON, L9K1P1",Simplii,,5eff116b-d0d9-4924-4b37-08dc29c779f9,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,Get Statements Completed,,...,2023-10-18,POS MERCHANDISE DOLLARAMA 898,debit,,5.34,,4939.63,2024-02-09,2023-11-11,2023-08-13
3,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,"36 HOLKHAM AVE, ANCASTER, ON, L9K1P1",Simplii,,5eff116b-d0d9-4924-4b37-08dc29c779f9,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,Get Statements Completed,,...,2023-10-18,POS MERCHANDISE FRESHCO 9825,debit,,11.8,,4944.97,2024-02-09,2023-11-11,2023-08-13
4,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,"36 HOLKHAM AVE, ANCASTER, ON, L9K1P1",Simplii,,5eff116b-d0d9-4924-4b37-08dc29c779f9,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,Get Statements Completed,,...,2024-02-06,INTERAC E-TRANSFER SEND Jason Schaubel,debit,,50.0,,1727.55,2024-02-09,2023-11-11,2023-08-13


### Ad-Hoc Analysis & Prototyping

Copy and paste the query below according to your development needs.

#### Example

In [43]:
print("\n--- Ad-Hoc Query: Total Withdrawals per Customer ---")
query = """
    SELECT 
        email, 
        SUM(withdrawals) as total_withdrawals
    FROM main.all_transactions_by_customer
    GROUP BY email
    ORDER BY total_withdrawals DESC
"""
run_query(query)


--- Ad-Hoc Query: Total Withdrawals per Customer ---


Unnamed: 0,email,total_withdrawals
0,JOELSCHAUBEL@GMAIL.COM,265788.41


#### Modify Below

In [50]:
query = """
    SELECT *
    FROM main.dim_calendar
"""
all_transactions_by_customer = run_query(query)
all_transactions_by_customer.head()

Unnamed: 0,date_day,prior_date_day,next_date_day,prior_year_date_day,prior_year_over_year_date_day,day_of_week,day_of_week_iso,day_of_week_name,day_of_week_name_short,day_of_month,...,month_start_date,month_end_date,prior_year_month_start_date,prior_year_month_end_date,quarter_of_year,quarter_start_date,quarter_end_date,year_number,year_start_date,year_end_date
0,2023-01-01,2022-12-31,2023-01-02,2022-01-01,2022-01-02,1,7,Sunday,Sun,1,...,2023-01-01,2023-01-31,2022-01-01,2022-01-31,1,2023-01-01,2023-03-31,2023,2023-01-01,2023-12-31
1,2023-01-02,2023-01-01,2023-01-03,2022-01-02,2022-01-03,2,1,Monday,Mon,2,...,2023-01-01,2023-01-31,2022-01-01,2022-01-31,1,2023-01-01,2023-03-31,2023,2023-01-01,2023-12-31
2,2023-01-03,2023-01-02,2023-01-04,2022-01-03,2022-01-04,3,2,Tuesday,Tue,3,...,2023-01-01,2023-01-31,2022-01-01,2022-01-31,1,2023-01-01,2023-03-31,2023,2023-01-01,2023-12-31
3,2023-01-04,2023-01-03,2023-01-05,2022-01-04,2022-01-05,4,3,Wednesday,Wed,4,...,2023-01-01,2023-01-31,2022-01-01,2022-01-31,1,2023-01-01,2023-03-31,2023,2023-01-01,2023-12-31
4,2023-01-05,2023-01-04,2023-01-06,2022-01-05,2022-01-06,5,4,Thursday,Thu,5,...,2023-01-01,2023-01-31,2022-01-01,2022-01-31,1,2023-01-01,2023-03-31,2023,2023-01-01,2023-12-31


In [47]:
query = """
    SELECT
        username,
        email,
        request_id,
        request_datetime,
        date,
        withdrawals,
        deposits,
        balance
    FROM main.all_transactions_by_customer
"""
all_transactions_by_customer = run_query(query)
all_transactions_by_customer.head()

Unnamed: 0,username,email,request_id,request_datetime,date,withdrawals,deposits,balance
0,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2024-02-08,45.00,,1482.55
1,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-03-29,2.00,,3043.40
2,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-10-18,5.34,,4939.63
3,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-10-18,11.80,,4944.97
4,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2024-02-06,50.00,,1727.55
...,...,...,...,...,...,...,...,...
1212,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-02-17,1500.00,,4048.40
1213,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-02-16,,411.26,5548.40
1214,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-03-29,27.77,,2946.96
1215,Joel Schaubel,JOELSCHAUBEL@GMAIL.COM,727DAE61-63E9-4121-801E-F11CA8FF32FD,2024-02-11 19:26:39,2023-03-29,68.67,,2974.73


### Closing the Connection

In [33]:
con.close()
print("\nDatabase connection closed.") 


Database connection closed.
