## 데이터 분석을 위한 SQL 레시피

Data soruce : https://hanbit.co.kr/support/supplement_survey.html?pcode=B8585882565

System : PostgreSQL

In [1]:
import pandas as pd
import psycopg2 as pg2
from sqlalchemy import create_engine

engine = create_engine('postgresql://testuser:testpass@localhost:5432/postgresql_analysis')

con = pg2.connect(host='localhost',
                  user='testuser',
                  password='testpass',
                  database='postgresql_analysis')
con.autocommit = True
cur = con.cursor()

In [2]:
def select(query):
    return pd.read_sql(query, con)

In [3]:
pd.options.display.max_rows = 10

## 7. 하나의 테이블에 대한 조작

### [7-1] 집약 함수를 사용해서 테이블 전체의 특징량을 계산하는 쿼리

In [4]:
select('SELECT * FROM review;')

Unnamed: 0,user_id,product_id,score
0,U001,A001,4.0
1,U001,A002,5.0
2,U001,A003,5.0
3,U002,A001,3.0
4,U002,A002,3.0
5,U002,A003,4.0
6,U003,A001,5.0
7,U003,A002,4.0
8,U003,A003,4.0


In [5]:
query_71 = """
        SELECT 
           COUNT(*) AS total_count
         , COUNT(DISTINCT user_id) AS user_count
         , COUNT(DISTINCT product_id) AS product_count
         , SUM(score) AS sum
         , AVG(score) AS avg
         , MAX(score) AS max
         , MIN(score) AS min
        FROM
         review
        ;
        """

select(query_71)

Unnamed: 0,total_count,user_count,product_count,sum,avg,max,min
0,9,3,3,37.0,4.111111,5.0,3.0


### [7-2] 사용자 기반으로 데이터를 분할하고 집약 함수를 적용하는 쿼리

In [6]:
query_72 = """
        SELECT
           user_id
         , COUNT(*) AS total_count
         , COUNT(DISTINCT product_id) AS product_count
         , SUM(score) AS sum
         , AVG(score) AS avg
         , MAX(score) AS max
         , MIN(score) AS min
        FROM
         review
        GROUP BY
         user_id
        ;
        """

select(query_72)

Unnamed: 0,user_id,total_count,product_count,sum,avg,max,min
0,U001,3,3,14.0,4.666667,5.0,4.0
1,U002,3,3,10.0,3.333333,4.0,3.0
2,U003,3,3,13.0,4.333333,5.0,4.0


### [7-3] 윈도 함수를 사용해 집약 함수의 결고와 원래 값을 동시에 다루는 쿼리

- OVER 구문에 매개 변수를 지정하지 않으면 테이블 전체에 집약 함수를 적용한 값이 리턴됩니다.
- 매개 변수에 PARTITION BY <컬러 이름>을 지정하면 해당 컬럼 값을 기반으로 그룹화하고 집약 함수를 적용합니다.

In [7]:
query_73 = """
        SELECT
           user_id
         , product_id
         , score
         -- 전체 평균 리뷰 점수
         , AVG(score) OVER() AS avg_score
         -- 사용자의 평균 리뷰 점수
         , AVG(score) OVER(PARTITION BY user_id) AS user_avg_score
         , score - AVG(score) OVER(PARTITION BY user_id) AS user_avg_score_diff
        FROM
         review
        ;
        """

select(query_73)

Unnamed: 0,user_id,product_id,score,avg_score,user_avg_score,user_avg_score_diff
0,U001,A001,4.0,4.111111,4.666667,-0.666667
1,U001,A002,5.0,4.111111,4.666667,0.333333
2,U001,A003,5.0,4.111111,4.666667,0.333333
3,U002,A001,3.0,4.111111,3.333333,-0.333333
4,U002,A002,3.0,4.111111,3.333333,-0.333333
5,U002,A003,4.0,4.111111,3.333333,0.666667
6,U003,A001,5.0,4.111111,4.333333,0.666667
7,U003,A002,4.0,4.111111,4.333333,-0.333333
8,U003,A003,4.0,4.111111,4.333333,-0.333333


### [7-4] 윈도 함수의 ORDER BY 구문을 사용해 테이블 내부의 순서를 다루는 쿼리

In [8]:
select('SELECT * FROM popular_products;')

Unnamed: 0,product_id,category,score
0,A001,action,94.0
1,A002,action,81.0
2,A003,action,78.0
3,A004,action,64.0
4,D001,drama,90.0
5,D002,drama,82.0
6,D003,drama,78.0
7,D004,drama,58.0


- ROW_NUMBER() : 점수 순서로 유일한 순위를 붙임
- RANK() : 같은 순위를 허용해서 순위를 붙임
- DENSE_RANK() : 같은 순위가 있을 때 같은 순위 다음에 있는 순위를 건너 뛰고 순위를 붙임
- LAG(): 현재 행보다 앞에 있는 행의 값 추출하기
- LEAD() : 현재 행보다 뒤에 있는 행의 값 추출하기

In [9]:
query_74 = """
        SELECT
           product_id
         , score
         , ROW_NUMBER()         OVER(ORDER BY score DESC) AS row
         , RANK()               OVER(ORDER BY score DESC) AS rank
         , DENSE_RANK()         OVER(ORDER BY score DESC) AS dense_rank
         , LAG(product_id)      OVER(ORDER BY score DESC) AS lag1
         , LAG(product_id, 2)   OVER(ORDER BY score DESC) AS lag2         
         , LEAD(product_id)     OVER(ORDER BY score DESC) AS lead1
         , LEAD(product_id, 2)  OVER(ORDER BY score DESC) AS lead2         
        FROM
         popular_products
        ORDER BY
         row
        ;
        """

select(query_74)

Unnamed: 0,product_id,score,row,rank,dense_rank,lag1,lag2,lead1,lead2
0,A001,94.0,1,1,1,,,D001,D002
1,D001,90.0,2,2,2,A001,,D002,A002
2,D002,82.0,3,3,3,D001,A001,A002,A003
3,A002,81.0,4,4,4,D002,D001,A003,D003
4,A003,78.0,5,5,5,A002,D002,D003,A004
5,D003,78.0,6,5,5,A003,A002,A004,D004
6,A004,64.0,7,7,6,D003,A003,D004,
7,D004,58.0,8,8,7,A004,D003,,


#### | 윈도 프레임 지정에 대해서 |

ROWS BETWEEN 'start' AND 'end'

'start' / 'end'

- CURRENT ROW : 현재의 행
- n PRECEDING : n행의 앞
- n FOLLOWING : n행의 뒤
- UNBOUNDED PRECEDING : 이전 행의 전부
- UNBOUNDED FOLLOWING : 이후 행의 전부

### [7-5] ORDER BY 구문과 집약 함수를 조합해서 계산하는 쿼리

In [10]:
query_75 = """
        SELECT
           product_id
         , score
         -- 점수 순서로 유일한 순서를 붙임
         , ROW_NUMBER() OVER(ORDER BY score DESC) AS row
         -- 순위 상위부터의 누계 점수 계산하기
         , SUM(score)
            OVER(ORDER BY score DESC
             ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
           AS cum_score
         -- 현재 행과 앞 뒤의 행이 가진 값을 기반으로 평균 점수 계산하기
         , AVG(score)
            OVER(ORDER BY score DESC
             ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
           AS local_avg
         , FIRST_VALUE(product_id)
            OVER(ORDER BY score DESC
             ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
           AS first_value
         , LAST_VALUE(product_id)
            OVER(ORDER BY score DESC
             ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
           AS last_value
        FROM
         popular_products
        ORDER BY
         row
        ;
        """

select(query_75)

Unnamed: 0,product_id,score,row,cum_score,local_avg,first_value,last_value
0,A001,94.0,1,94.0,92.0,A001,D004
1,D001,90.0,2,184.0,88.666667,A001,D004
2,D002,82.0,3,266.0,84.333333,A001,D004
3,A002,81.0,4,347.0,80.333333,A001,D004
4,A003,78.0,5,425.0,79.0,A001,D004
5,D003,78.0,6,503.0,73.333333,A001,D004
6,A004,64.0,7,567.0,66.666667,A001,D004
7,D004,58.0,8,625.0,61.0,A001,D004


### [7-6] 윈도 프레임 지정별 상품 ID를 집약하는 쿼리

In [11]:
query_76 = """
        SELECT
           product_id
         -- 점수 순서로 유일한 순서를 붙임
         , ROW_NUMBER() OVER(ORDER BY score DESC) AS row
         -- 가장 앞 순위부터 가장 뒷 순위까지의 범위를 대상으로 상품 ID 집약하기
         , array_agg(product_id)
            OVER(ORDER BY score DESC
             ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
           AS whole_agg
         -- 가장 앞 순위부터 현재 순위까지의 범위를 대상으로 상품 ID 집약하기
         , array_agg(product_id)
            OVER(ORDER BY score DESC
             ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
           AS cum_agg
         -- 순위 하나 앞과 하나 뒤까지의 범위를 대상으로 상품 ID 집약하기
         , array_agg(product_id)
            OVER(ORDER BY score DESC
             ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
           AS local_agg
        FROM
         popular_products
        WHERE
         category = 'action'
        ORDER BY
         row
        ;
        """

select(query_76)

Unnamed: 0,product_id,row,whole_agg,cum_agg,local_agg
0,A001,1,"[A001, A002, A003, A004]",[A001],"[A001, A002]"
1,A002,2,"[A001, A002, A003, A004]","[A001, A002]","[A001, A002, A003]"
2,A003,3,"[A001, A002, A003, A004]","[A001, A002, A003]","[A002, A003, A004]"
3,A004,4,"[A001, A002, A003, A004]","[A001, A002, A003, A004]","[A003, A004]"


### [7-7] 윈도 함수를 사용해 카테고리들의 순위를 계산하는 쿼리

In [12]:
query_77 = """
        SELECT
           category
         , product_id
         -- 카테고리별로 점수 순서로 정렬하고 유일한 순서를 붙임
         , ROW_NUMBER()
            OVER(PARTITION BY category ORDER BY score DESC)
           AS row
         -- 카테고리별로 같은 순위를 허가하고 순위를 붙임
         , RANK()
            OVER(PARTITION BY category ORDER BY score DESC)            
           AS rank
         -- 카테고리별로 같은 순위가 있을 때
         -- 같은 순위 다음에 있는 순위를 건너 뛰고 순위를 붙임
         , DENSE_RANK()
            OVER(PARTITION BY category ORDER BY score DESC)            
           AS dense_rank       
        FROM
         popular_products
        ORDER BY
         category, row
        ;
        """

select(query_77)

Unnamed: 0,category,product_id,row,rank,dense_rank
0,action,A001,1,1,1
1,action,A002,2,2,2
2,action,A003,3,3,3
3,action,A004,4,4,4
4,drama,D001,1,1,1
5,drama,D002,2,2,2
6,drama,D003,3,3,3
7,drama,D004,4,4,4


### [7-8] 카테고리들의 순위 상위 2개까지의 상품을 추출하는 쿼리

윈도 함수를 WHERE 구문에 작성할 수 없으므로, SELECT 구문에서 윈도 함수를 사용한 결과를 서브 쿼리로 만들고 외부에서 WHERE 구문을 적용해야 합니다.

In [13]:
query_78 = """
        SELECT *
        FROM
         -- 서브 쿼리 내부에서 순위 계산하기
         (
          SELECT
              category
            , product_id
            , score
            , ROW_NUMBER()
               OVER(PARTITION BY category ORDER BY score DESC)            
              AS rank
           FROM
              popular_products
          ) AS popular_products_with_rank
        -- 외부 쿼리에서 순위 활요해 압축하기
        WHERE
         rank <=2
        ORDER BY
         category, rank
        ;
        """

select(query_78)

Unnamed: 0,category,product_id,score,rank
0,action,A001,94.0,1
1,action,A002,81.0,2
2,drama,D001,90.0,1
3,drama,D002,82.0,2


### [7-9] 카테고리별 순위 최상위 상품을 추출하는 쿼리

In [14]:
query_79 = """
        -- DISTINCT 구문을 사용해 중복 제거하기
        SELECT DISTINCT
           category
           -- 카테고리별로 순위 최상위 상품 ID 추출하기
         , FIRST_VALUE(product_id)
            OVER(PARTITION BY category ORDER BY score DESC
             ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
           AS product_id
        FROM
           popular_products
        ;
        """

select(query_79)

Unnamed: 0,category,product_id
0,drama,D001
1,action,A001


### [7-10] 행으로 지정된 지표 값을 열로 변환하는 쿼리

In [15]:
select('SELECT * FROM daily_kpi;')

Unnamed: 0,dt,indicator,val
0,2017-01-01,impressions,1800
1,2017-01-01,sessions,500
2,2017-01-01,users,200
3,2017-01-02,impressions,2000
4,2017-01-02,sessions,700
5,2017-01-02,users,250


In [16]:
query_710 = """
        SELECT
           dt
         -- 현재 코드에서 CASE 표현식의 결과는 리스트
         -- 여기에서 스칼라값 하나를 추출하기 위해 일반적으로 MAX/MIN 등을 사용
         , MAX(CASE WHEN indicator = 'impressions' THEN val END) AS impressions
         , MAX(CASE WHEN indicator = 'sessions' THEN val END) AS sessions
         , MAX(CASE WHEN indicator = 'users' THEN val END) AS users        
        FROM
           daily_kpi
        GROUP BY
           dt
        ORDER BY
           dt
        ;
        """

select(query_710)

Unnamed: 0,dt,impressions,sessions,users
0,2017-01-01,1800,500,200
1,2017-01-02,2000,700,250


### [7-11] 행을 집약해서 쉼표로 구분된 문자열로 변환하기

In [17]:
select('SELECT * FROM purchase_detail_log;')

Unnamed: 0,purchase_id,product_id,price
0,100001,A001,300
1,100001,A002,400
2,100001,A003,200
3,100002,D001,500
4,100002,D002,300
5,100003,A001,300


In [18]:
query_711 = """
        SELECT
           purchase_id
         , string_agg(product_id, ', ') AS product_ids
         , SUM(price) AS amount
        FROM
           purchase_detail_log
        GROUP BY
           purchase_id
        ORDER BY
           purchase_id
        ;
        """

select(query_711)

Unnamed: 0,purchase_id,product_ids,amount
0,100001,"A001, A002, A003",900
1,100002,"D001, D002",800
2,100003,A001,300


### [7-12] 일련 번호를 가진 피벗 테이블을 사용해 행으로 변환하는 쿼리

In [19]:
select('SELECT * FROM quarterly_sales;')

Unnamed: 0,year,q1,q2,q3,q4
0,2015,82000,83000,78000.0,83000.0
1,2016,85000,85000,80000.0,81000.0
2,2017,92000,81000,,


In [20]:
query_712 = """
        SELECT
           q.year
         -- q1에서 q4까지의 레이블 이름 출력하기
         , CASE
            WHEN p.idx = 1 THEN 'q1'
            WHEN p.idx = 2 THEN 'q2'
            WHEN p.idx = 3 THEN 'q3'
            WHEN p.idx = 4 THEN 'q4'
           END AS quarter
        -- q1에서 q4까지의 매출 출력하기
         , CASE
            WHEN p.idx = 1 THEN q.q1
            WHEN p.idx = 2 THEN q.q2
            WHEN p.idx = 3 THEN q.q3
            WHEN p.idx = 4 THEN q.q4
           END AS sales
        FROM
           quarterly_sales AS q
        CROSS JOIN
         (          SELECT 1 AS idx
          UNION ALL SELECT 2 AS idx
          UNION ALL SELECT 3 AS idx
          UNION ALL SELECT 4 AS idx
         ) AS p
        ;
        """

select(query_712)

Unnamed: 0,year,quarter,sales
0,2015,q1,82000.0
1,2015,q2,83000.0
2,2015,q3,78000.0
3,2015,q4,83000.0
4,2016,q1,85000.0
...,...,...,...
7,2016,q4,81000.0
8,2017,q1,92000.0
9,2017,q2,81000.0
10,2017,q3,


### [7-13] 테이블 함수를 사용해 배열을 행으로 전개하는 쿼리

In [21]:
select('SELECT * FROM purchase_log;')

Unnamed: 0,purchase_id,product_ids
0,100001,"A001,A002,A003"
1,100002,"D001,D002"
2,100003,A001


In [22]:
query_713 = """
        -- unnest함수는 매개 변수로 배열을 받고, 배열을 레코드 분할해서 테이블로 리턴
        SELECT unnest(ARRAY['A001', 'A002', 'A003']) AS product_id;
        """

select(query_713)

Unnamed: 0,product_id
0,A001
1,A002
2,A003


### [7-14] 테이블 함수를 사용해 쉼표로 구분된 문자열 데이터를 행으로 전개하는 쿼리

In [23]:
query_714 = """
        SELECT
           purchase_id
         , product_id
        FROM
           purchase_log AS p
        -- string_to_array 함수로 문자열을 배열로 변환하고, unnest 함수로 테이블로 변환하기
        CROSS JOIN unnest(string_to_array(product_ids, ',')) AS product_id
        """

select(query_714)

Unnamed: 0,purchase_id,product_id
0,100001,A001
1,100001,A002
2,100001,A003
3,100002,D001
4,100002,D002
5,100003,A001


### [7-15] PostgreSQL에서 쉼표로 구분된 데이터를 행으로 전개하는 쿼리

In [24]:
query_715 = """
        SELECT
           purchase_id
         , regexp_split_to_table(product_ids, ',') AS product_id
        FROM
           purchase_log
        ;
        """

select(query_715)

Unnamed: 0,purchase_id,product_id
0,100001,A001
1,100001,A002
2,100001,A003
3,100002,D001
4,100002,D002
5,100003,A001


### [7-16] 일련 번호를 가진 피벗 테이블을 만드는 쿼리

In [25]:
query_716 = """
        SELECT *
        FROM (
                   SELECT 1 AS idx  
         UNION ALL SELECT 2 AS idx
         UNION ALL SELECT 3 AS idx
        ) AS pivot 
        ;
        """

select(query_716)

Unnamed: 0,idx
0,1
1,2
2,3


### [7-17] split_part 함수의 사용 예

In [26]:
query_717 = """
        SELECT
           -- split_part 함수는 문자열을 쉼표 등의 구분자로 분할해 n번째 요소를 추출
           split_part('A001,A002,A003', ',', 1) AS part_1
         , split_part('A001,A002,A003', ',', 2) AS part_2
         , split_part('A001,A002,A003', ',', 3) AS part_3           
        ;
        """

select(query_717)

Unnamed: 0,part_1,part_2,part_3
0,A001,A002,A003


### [7-18] 문자 수의 차이를 사용해 상품 수를 계산하는 쿼리

In [27]:
query_718 = """
        SELECT
           purchase_id
           -- char_length 함수는 문자 수를 세는 함수
           -- ','(콤마) 갯수 차이를 구하여 상품 수를 계산
         , product_ids
         , 1 + char_length(product_ids)
             - char_length(replace(product_ids, ',', ''))
           AS product_num
        FROM
           purchase_log
        ;
        """

select(query_718)

Unnamed: 0,purchase_id,product_ids,product_num
0,100001,"A001,A002,A003",3
1,100002,"D001,D002",2
2,100003,A001,1


### [7-19] 피벗 테이블을 사용해 문자열을 행으로 전개하는 쿼리

In [28]:
query_719 = """
        SELECT
           l.purchase_id
         , l.product_ids
           -- 삼품 수만큼 순번 붙이기
         , p.idx
           -- 문자열을 쉼표로 구분해서 분할하고, idx번째 요소 추출하기
         , split_part(l.product_ids, ',', p.idx) AS product_id
        FROM
           purchase_log AS l
        JOIN
          (          SELECT 1 AS idx
           UNION ALL SELECT 2 AS idx
           UNION ALL SELECT 3 AS idx
          ) AS p
          -- 피벗 테이블의 id가 상품 수 이하의 경우 결합하기
          ON p.idx <=
            (1 + char_length(l.product_ids)
               - char_length(replace(l.product_ids, ',', '')))
        ;
        """

select(query_719)

Unnamed: 0,purchase_id,product_ids,idx,product_id
0,100001,"A001,A002,A003",1,A001
1,100001,"A001,A002,A003",2,A002
2,100001,"A001,A002,A003",3,A003
3,100002,"D001,D002",1,D001
4,100002,"D001,D002",2,D002
5,100003,A001,1,A001


## 8. 여러 개의 테이블 조작하기

### [8-1] UNION ALL 구문을 사용해 테이블을 세로로 결합하는 쿼리

In [29]:
select('SELECT * FROM app1_mst_users;')

Unnamed: 0,user_id,name,email
0,U001,Sato,sato@example.com
1,U002,Suzuki,suzuki@example.com


In [30]:
select('SELECT * FROM app2_mst_users;')

Unnamed: 0,user_id,name,phone
0,U001,Ito,080-xxxx-xxxx
1,U002,Tanaka,070-xxxx-xxxx


In [31]:
query_81 = """
         SELECT 'app1' AS app_name, user_id, name, email FROM app1_mst_users
        UNION ALL
         SELECT 'app2' AS app_name, user_id, name, NULL AS email FROM app2_mst_users
        ;
        """

select(query_81)

Unnamed: 0,app_name,user_id,name,email
0,app1,U001,Sato,sato@example.com
1,app1,U002,Suzuki,suzuki@example.com
2,app2,U001,Ito,
3,app2,U002,Tanaka,


### [8-2] 여러 개의 테이블을 결합해서 가로로 정렬하는 쿼리

In [32]:
select('SELECT * FROM mst_categories;')

Unnamed: 0,category_id,name
0,1,dvd
1,2,cd
2,3,book


In [33]:
select('SELECT * FROM category_sales;')

Unnamed: 0,category_id,sales
0,1,850000
1,2,500000


In [34]:
select('SELECT * FROM product_sale_ranking;')

Unnamed: 0,category_id,rank,product_id,sales
0,1,1,D001,50000
1,1,2,D002,20000
2,1,3,D003,10000
3,2,1,C001,30000
4,2,2,C002,20000
5,2,3,C003,10000


In [35]:
query_82 = """
        SELECT
           m.category_id
         , m.name
         , s.sales
         , r.product_id AS sale_product
        FROM
           mst_categories AS m
         JOIN
           -- 카테고리별로 매출액 결합하기
           category_sales AS s
           ON m.category_id = s.category_id
         JOIN
           -- 카테고리별로 상품 결합하기
           product_sale_ranking AS r
           ON m.category_id = r.category_id
        ;
        """

select(query_82)

Unnamed: 0,category_id,name,sales,sale_product
0,1,dvd,850000,D001
1,1,dvd,850000,D002
2,1,dvd,850000,D003
3,2,cd,500000,C001
4,2,cd,500000,C002
5,2,cd,500000,C003


### [8-3] 마스터 테이블의 행 수를 변경하지 않고 여러 개의 테이블을 가로로 정렬하는 쿼리

In [36]:
query_83 = """
        SELECT
           m.category_id
         , m.name
         , s.sales
         , r.product_id AS top_sale_product
        FROM
           mst_categories AS m
         -- LEFT JOIN을 사용해서 결합한 레코드를 남김
         LEFT JOIN
           -- 카테고리별로 매출액 결합하기
           category_sales AS s
           ON m.category_id = s.category_id
         -- LEFT JOIN을 사용해서 결합하지 못한 레코드를 남김  
         LEFT JOIN
           -- 카테고리별로 최고 매출 상품 하나만 추출해서 결합하기
           product_sale_ranking AS r
           ON m.category_id = r.category_id
           AND r.rank = 1
        ;
        """

select(query_83)

Unnamed: 0,category_id,name,sales,top_sale_product
0,1,dvd,850000.0,D001
1,2,cd,500000.0,C001
2,3,book,,


### [8-4] 상관 서브쿼리로 여러 개의 테이블을 가로로 정렬하는 쿼리

In [37]:
query_84 = """
        SELECT
           m.category_id
         , m.name
           -- 상관 서브쿼리를 사용해 카테고리별로 매출액 추출하기
         , (SELECT s.sales
            FROM category_sales AS s
            WHERE m.category_id = s.category_id
            ) AS sales
           -- 상관 서브쿼리를 사용해 카테고리별로 최고 매출 상품을
           -- 하나 추출하기 (순위로 따로 압축하지 않아도 됨)
         , (SELECT r.product_id
            FROM product_sale_ranking AS r
            WHERE m.category_id = r.category_id
            ORDER BY sales DESC
            LIMIT 1
            ) AS top_sale_product
        FROM
           mst_categories AS m
        ;
        """

select(query_84)

Unnamed: 0,category_id,name,sales,top_sale_product
0,1,dvd,850000.0,D001
1,2,cd,500000.0,C001
2,3,book,,


### [8-5] 신용 카드 등록과 구매 이력 유무를 0과 1이라는 플래그로 나타내는 쿼리

In [40]:
select('SELECT * FROM mst_users_with_card_number;')

Unnamed: 0,user_id,card_number
0,U001,1234-xxxx-xxxx-xxxx
1,U002,
2,U003,5678-xxxx-xxxx-xxxx


In [41]:
select('SELECT * FROM purchase_log;')

Unnamed: 0,purchase_id,user_id,amount,stamp
0,100001,U001,200,2017-01-30 10:00:00
1,100002,U001,500,2017-02-10 10:00:00
2,100003,U001,200,2017-02-12 10:00:00
3,100004,U002,800,2017-03-01 10:00:00
4,100005,U002,400,2017-03-02 10:00:00


In [42]:
query_85 = """
        SELECT
           m.user_id
         , m.card_number
         , COUNT(p.user_id) AS purchase_count
           -- 신용 카드 번호를 등록한 경우 1, 등록하지 않은 경우 0으로 표현
         , CASE WHEN m.card_number IS NOT NULL THEN 1 ELSE 0 
            END AS has_card
           -- 구매 이력이 있는 경우 1, 없는 경우 0으로 표현
           -- SIGN 함수는 0은 0으로, 1이상은 1로 변환
         , SIGN(COUNT(p.user_id)) AS has_purchhsed
        FROM
           mst_users_with_card_number AS m
         LEFT JOIN
           purchase_log AS p
           ON m.user_id = p.user_id
        GROUP BY
           m.user_id, m.card_number
        ;
        """

select(query_85)

Unnamed: 0,user_id,card_number,purchase_count,has_card,has_purchhsed
0,U002,,2,0,1.0
1,U003,5678-xxxx-xxxx-xxxx,0,1,0.0
2,U001,1234-xxxx-xxxx-xxxx,3,1,1.0


### [8-6] 카테고리별 순위를 추가한 테이블에 이름 붙이기

In [43]:
select('SELECT * FROM product_sales;')

Unnamed: 0,category_name,product_id,sales
0,dvd,D001,50000
1,dvd,D002,20000
2,dvd,D003,10000
3,cd,C001,30000
4,cd,C002,20000
5,cd,C003,10000
6,book,B001,20000
7,book,B002,15000
8,book,B003,10000
9,book,B003,5000


In [44]:
query_86 = """
        WITH
        product_sale_ranking AS (
         SELECT
            category_name
          , product_id
          , sales
          , ROW_NUMBER() OVER(PARTITION BY category_name 
             ORDER BY sales DESC) AS rank
         FROM
            product_sales
        )
        SELECT *
        FROM product_sale_ranking
        ;
        """

select(query_86)

Unnamed: 0,category_name,product_id,sales,rank
0,book,B001,20000,1
1,book,B002,15000,2
2,book,B003,10000,3
3,book,B003,5000,4
4,cd,C001,30000,1
5,cd,C002,20000,2
6,cd,C003,10000,3
7,dvd,D001,50000,1
8,dvd,D002,20000,2
9,dvd,D003,10000,3


### [8-7] 카테고리들의 순위에서 유니크한 순위 목록을 계산하는 쿼리

In [46]:
query_87 = """
        WITH
        product_sale_ranking AS (
         SELECT
            category_name
          , product_id
          , sales
          , ROW_NUMBER() OVER(PARTITION BY category_name 
             ORDER BY sales DESC) AS rank
         FROM
            product_sales
        )
        , mst_rank AS (
          SELECT DISTINCT rank
          FROM product_sale_ranking
        )
        SELECT *
        FROM mst_rank
        ;
        """

select(query_87)

Unnamed: 0,rank
0,4
1,2
2,3
3,1


### [8-8] 카테고리들의 순위를 횡단적으로 출력하는 쿼리

In [49]:
query_88 = """
        WITH
        product_sale_ranking AS (
         SELECT
            category_name
          , product_id
          , sales
          , ROW_NUMBER() OVER(PARTITION BY category_name 
             ORDER BY sales DESC) AS rank
         FROM
            product_sales
        )
        , mst_rank AS (
          SELECT DISTINCT rank
          FROM product_sale_ranking
        )
        SELECT
           m.rank
         , r1.product_id AS dvd
         , r1.sales AS dvd_sales
         , r2.product_id AS cd
         , r2.sales AS cd_sales
         , r3.product_id AS book
         , r3.sales AS book_sales
        FROM
           mst_rank AS m
         LEFT JOIN
           product_sale_ranking AS r1
           ON m.rank = r1.rank
           AND r1.category_name = 'dvd'
         LEFT JOIN
           product_sale_ranking AS r2
           ON m.rank = r2.rank
           AND r2.category_name = 'cd'
         LEFT JOIN
           product_sale_ranking AS r3
           ON m.rank = r3.rank
           AND r3.category_name = 'book'
        ORDER BY 
           m.rank
        ;
        """

select(query_88)

Unnamed: 0,rank,dvd,dvd_sales,cd,cd_sales,book,book_sales
0,1,D001,50000.0,C001,30000.0,B001,20000
1,2,D002,20000.0,C002,20000.0,B002,15000
2,3,D003,10000.0,C003,10000.0,B003,10000
3,4,,,,,B004,5000


### [8-9] 디바이스 ID와 이름의 마스터 테이블을 만드는 쿼리

In [58]:
query_89 = """
        WITH
        mst_devices AS (
                   SELECT 1 AS device_id, '데스크톱' AS device_name
         UNION ALL SELECT 2 AS device_id, '스마트폰' AS device_name
         UNION ALL SELECT 3 AS device_id, '애플리케이션' AS device_name             
        )
        SELECT *
        FROM mst_devices
        ;
        """

select(query_89)

Unnamed: 0,device_id,device_name
0,1,데스크톱
1,2,스마트폰
2,3,애플리케이션


### [8-10] VALUES 구문을 사용해 동적으로 테이블을 만드는 쿼리

In [57]:
query_810 = """
        WITH
        mst_devices(device_id, dvice_name) AS (
         VALUES
            (1, '데스크톱')
          , (2, '스마트폰')
          , (3, '애플리케이션')
        )
        SELECT *
        FROM mst_devices
        ;
        """

select(query_810)

Unnamed: 0,device_id,dvice_name
0,1,데스크톱
1,2,스마트폰
2,3,애플리케이션


### [8-11] 순번을 가진 유사 테이블을 작성하는 쿼리

In [60]:
query_811 = """
        WITH
        series AS (
         -- 1부터 5까지의 순번 생성하기
         SELECT generate_series(1, 5) AS idx
        )
        SELECT *
        FROM series
        ;
        """

select(query_811)

Unnamed: 0,idx
0,1
1,2
2,3
3,4
4,5
