In [None]:
# declare a list tasks whose products you want to use as inputs
upstream = None


In [None]:
# Loading in SQL extension
%reload_ext sql
# Initiating a DuckDB database named 'bank_data.duck.db' to run SQL queries
%sql duckdb:///bank_data.duck.db

In [None]:
%%sql
DROP TABLE IF EXISTS bank.account;
DROP TABLE IF EXISTS bank.client;
DROP TABLE IF EXISTS bank.disp;
DROP TABLE IF EXISTS bank.card;
DROP TABLE  IF EXISTS bank.district;
DROP TABLE  IF EXISTS bank.loan;
DROP TABLE  IF EXISTS bank.order;
DROP TABLE  IF EXISTS bank.trans;
DROP SCHEMA IF EXISTS bank;

DROP TABLE  IF EXISTS client_account_district;
DROP TABLE  IF EXISTS account_trans_order;

In [None]:
%%sql
CREATE SCHEMA bank;
CREATE TABLE bank.account AS
FROM read_csv_auto('etl/expanded_data/account.csv', header=True, sep=',');
CREATE TABLE bank.card AS
FROM read_csv_auto('etl/expanded_data/card.csv', header=True, sep=',');
CREATE TABLE bank.client AS
FROM read_csv_auto('etl/expanded_data/client.csv', header=True, sep=',');
CREATE TABLE bank.disp AS
FROM read_csv_auto('etl/expanded_data/disp.csv', header=True, sep=',');
CREATE TABLE bank.district AS
FROM read_csv_auto('etl/expanded_data/district.csv', header=True, sep=',');
CREATE TABLE bank.loan AS
FROM read_csv_auto('etl/expanded_data/loan.csv', header=True, sep=',');
CREATE TABLE bank.order AS
FROM read_csv_auto('etl/expanded_data/order.csv', header=True, sep=',');
CREATE TABLE bank.trans AS
FROM read_csv_auto('etl/expanded_data/trans.csv', header=True, sep=',');

Merge `client`, `account`, and `district` tables: This will give us a view of each client's demographic information (from `bank.client`), their account details (from `bank.account`), and the district's economic indicators (from `bank.district`).

In [None]:
%%sql
CREATE TABLE IF NOT EXISTS client_account_district AS
SELECT 
    c.client_id,
    c.birth_number,
    a.account_id,
    a.frequency,
    a.date AS account_creation_date,
    d.district_name,
    d.region,
    d.no_of_inhabitants,
    d.average_salary,
    d.unemployment_rate_95,
    d.unemployment_rate_96,
    d.no_of_entrepreneurs_per_1000_inhabitants
FROM bank.client c
JOIN bank.account a ON c.district_id = a.district_id
JOIN bank.district d ON c.district_id = d.district_id;

Merge `account`, `trans`, and `order` tables: This will give us a view of each account's transactions and orders, which might indicate the client's financial activity.

In [None]:
%%sql
CREATE TABLE IF NOT EXISTS account_trans_order AS
SELECT 
    a.account_id,
    a.frequency,
    a.date AS account_creation_date,
    t.trans_id,
    t.date AS transaction_date,
    t.type AS transaction_type,
    t.operation,
    t.amount AS transaction_amount,
    t.balance,
    o.order_id,
    o.bank_to,
    o.account_to,
    o.amount AS order_amount
FROM bank.account a
JOIN bank.trans t ON a.account_id = t.account_id
JOIN bank.order o ON a.account_id = o.account_id;
