In [2]:
import pymysql
from sqlalchemy import create_engine
import pandas as pd
import getpass  # To get the password without showing the input

In [3]:
password = getpass.getpass()
connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'
engine = create_engine(connection_string)
%load_ext sql
%sql {connection_string}

 ·········


'Connected: root@bank'

### Lesson 1 key concepts

> :clock10: 20 min

- What are common table expressions (CTE)
- Writing the CTEs


<summary>Description: CTEs </summary>

A **common table expression** is a named object that stores temporarily results of a query and it exists only within the execution scope of a single SQL statement. Here are some of the reasons for using the CTEs:

- Improves readability and performance of the query
- Helps in simplifying the queries
- Recursive CTEs can be used for hierarchical data (this is not in scope of this class though, but good to mention)


<summary> Code Sample </summary>

- A very simple example to show the general syntax
- The query after the `AS` keyword can be any query (from a simple to a very complex)

In [10]:
%%sql
with cte_loan as (
  select * from bank.loan
)
select * from cte_loan
where status = 'B'
limit 5;

 * mysql+pymysql://root:***@localhost/bank
5 rows affected.


loan_id,account_id,date,amount,duration,payments,status
5314,1787,930705,96396,12,8033.0,B
6111,5428,930924,174744,24,7281.0,B
6228,6034,931201,464520,60,7742.0,B
7097,10266,931223,75624,24,3151.0,B
6650,8073,940217,49320,12,4110.0,B


In this query, we want to find the total amount and total balance of each customer in the transactions table and then pull more information on those customers by using a join between the CTE and the account table:

In [6]:
%%sql
with cte_transactions as (
  select account_id, sum(amount), sum(balance)
  from bank.trans
  group by account_id
)
select * from cte_transactions ct
join account a
on ct.account_id = a.account_id
limit 5;

 * mysql+pymysql://root:***@localhost/bank
5 rows affected.


account_id,sum(amount),sum(balance),account_id_1,district_id,frequency,date
1,276676.89999961853,3213242.1962890625,1,18,POPLATEK MESICNE,950324
2,2041870.69919014,13581073.080078123,2,1,POPLATEK MESICNE,930226
3,213256.20001220703,2591502.9140625,3,5,POPLATEK MESICNE,970707
4,268801.4000110626,3302279.50390625,4,12,POPLATEK MESICNE,960221
5,111093.39999771118,1744284.1015625,5,15,POPLATEK MESICNE,970530


# 3.06 Activity 1

Keep working on the `bank` database.

Use a CTE to display the first account opened by a district.

### Solution:

In [20]:
%%sql
select account_id, district_id, rank() over (partition by district_id order by date) as open_order
from bank.account
limit 5;

 * mysql+pymysql://root:***@localhost/bank
5 rows affected.


account_id,district_id,open_order
1539,1,1
764,1,2
1637,1,2
1730,1,4
2925,1,5


In [9]:
%%sql
with ordered_bank_accounts as (
  select account_id, district_id, rank() over (partition by district_id order by date) as open_order
  from bank.account
)
select d.A3, d.A2, a.account_id
from ordered_bank_accounts a
inner join bank.district d on d.A1 = a.district_id
where open_order = 1
order by d.A3, d.A2
limit 5;

 * mysql+pymysql://root:***@localhost/bank
5 rows affected.


A3,A2,account_id
central Bohemia,Benesov,128
central Bohemia,Beroun,2099
central Bohemia,Kladno,2519
central Bohemia,Kolin,187
central Bohemia,Kutna Hora,485
