## 데이터 분석을 위한 SQL 레시피

Data soruce : https://hanbit.co.kr/support/supplement_survey.html?pcode=B8585882565

System : PostgreSQL

In [1]:
import pandas as pd
import psycopg2 as pg2
from sqlalchemy import create_engine

engine = create_engine('postgresql://testuser:testpass@localhost:5432/postgresql_analysis')

con = pg2.connect(host='localhost',
                  user='testuser',
                  password='testpass',
                  database='postgresql_analysis')
con.autocommit = True
cur = con.cursor()

In [2]:
def select(query):
    return pd.read_sql(query, con)

In [3]:
pd.options.display.max_rows = 10

## 12. 시계열에 따른 사용자 전체의 상태 변화 찾기

### [12-1] 날짜별 등록 수의 추이를 집계하는 쿼리

In [4]:
select('SELECT * FROM mst_users;')

Unnamed: 0,user_id,sex,birth_date,register_date,register_device,withdraw_date
0,U001,M,1977-06-17,2016-10-01,pc,
1,U002,F,1953-06-12,2016-10-01,sp,2016-10-10
2,U003,M,1965-01-06,2016-10-01,pc,
3,U004,F,1954-05-21,2016-10-05,pc,
4,U005,M,1987-11-23,2016-10-05,sp,
...,...,...,...,...,...,...
25,U026,M,1969-02-21,2016-11-10,sp,
26,U027,F,2001-07-10,2016-11-10,pc,
27,U028,M,1976-05-26,2016-11-15,app,
28,U029,M,1964-04-06,2016-11-28,pc,


In [5]:
select('SELECT * FROM action_log;')

Unnamed: 0,session,user_id,action,stamp
0,989004ea,U001,view,2016-10-01 18:00:00
1,989004ea,U001,view,2016-10-01 18:01:00
2,989004ea,U001,view,2016-10-01 18:10:00
3,47db0370,U001,follow,2016-10-05 19:00:00
4,47db0370,U001,view,2016-10-05 19:10:00
...,...,...,...,...
9,87b5725f,U002,follow,2016-10-01 12:00:00
10,87b5725f,U002,follow,2016-10-01 12:01:00
11,87b5725f,U002,follow,2016-10-01 12:02:00
12,9afaf87c,U002,view,2016-10-02 13:00:00


In [6]:
query_121 = """
        SELECT
           register_date
         , COUNT(DISTINCT user_id) AS register_count
        FROM
           mst_users
        GROUP BY
           register_date
        ORDER BY
           register_date
        ;
        """

select(query_121)

Unnamed: 0,register_date,register_count
0,2016-10-01,3
1,2016-10-05,2
2,2016-10-10,3
3,2016-10-15,1
4,2016-10-16,1
...,...,...
10,2016-11-04,1
11,2016-11-05,2
12,2016-11-10,2
13,2016-11-15,1


### [12-2] 매달 등록 수와 전월비를 계산하는 쿼리

In [7]:
query_122 = """
        WITH
        mst_users_with_year_month AS (
         SELECT
            *
          , substring(register_date, 1, 7) AS year_month
         FROM
            mst_users
        )
        SELECT
           year_month
         , COUNT(DISTINCT user_id) AS register_count
         , LAG(COUNT(DISTINCT user_id)) OVER (ORDER BY year_month)
           AS last_month_count
         , 1.0
           * COUNT(DISTINCT user_id)
           / LAG(COUNT(DISTINCT user_id)) OVER (ORDER BY year_month)
           AS month_over_month_ratio
        FROM
           mst_users_with_year_month
        GROUP BY
           year_month
        ;
        """

select(query_122)

Unnamed: 0,year_month,register_count,last_month_count,month_over_month_ratio
0,2016-10,14,,
1,2016-11,16,14.0,1.142857


### [12-3] 디바이스들의 등록 수를 집계하는 쿼리 

In [8]:
query_123 = """
        WITH
        mst_users_with_year_month AS (
         SELECT
            *
          , substring(register_date, 1, 7) AS year_month
         FROM
            mst_users
        )
        SELECT
           year_month
         , COUNT(DISTINCT user_id) AS register_count
         , COUNT(DISTINCT CASE WHEN register_device = 'pc' THEN user_id END) AS register_pc
         , COUNT(DISTINCT CASE WHEN register_device = 'sp' THEN user_id END) AS register_sp
         , COUNT(DISTINCT CASE WHEN register_device = 'app' THEN user_id END) AS register_app
        FROM
           mst_users_with_year_month
        GROUP BY
           year_month
        ;
        """

select(query_123)

Unnamed: 0,year_month,register_count,register_pc,register_sp,register_app
0,2016-10,14,7,4,3
1,2016-11,16,4,4,8


### [12-4] '로그 최근 일자'와 '사용자별 등록일의 다음날'을 계산하는 쿼리

- 지속률 : 등록일 기준으로 이후 지정일 동안 사용자가 서비스를 얼마나 이용했는지 나타내는 지표
    - 지속률(Repeat) -> 사용자가 매일 사용했으면 하는 서비스
- 정착률 : 등록일 기준으로 이후 지정한 7일 동안 사용자가 서비스를 사용했는지 나타내는 지표
    - 정착률(Retention) -> 사용자에게 어떤 목적이 생겼을 때 사용했으면 하는 서비스

In [9]:
query_124 = """
        WITH
        action_log_with_mst_users AS (
         SELECT
            u.user_id
          , u.register_date
            -- 액션 날짜와 로그 전체의 최신 날짜를 날짜 자료형으로 변환하기
          , CAST(a.stamp AS date) AS action_date
          , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
            -- 등록일 다음날의 날짜 계산하기
          , CAST(u.register_date::date + '1 day'::interval AS date)
            AS next_day_1
         FROM
            mst_users AS u
         LEFT OUTER JOIN
            action_log AS a
         ON u.user_id = a.user_id
        )
        SELECT *
        FROM
           action_log_with_mst_users
        ORDER BY
           register_date
        ;
        """

select(query_124)

Unnamed: 0,user_id,register_date,action_date,latest_date,next_day_1
0,U003,2016-10-01,,2016-10-20,2016-10-02
1,U001,2016-10-01,2016-10-20,2016-10-20,2016-10-02
2,U001,2016-10-01,2016-10-20,2016-10-20,2016-10-02
3,U001,2016-10-01,2016-10-20,2016-10-20,2016-10-02
4,U002,2016-10-01,2016-10-01,2016-10-20,2016-10-02
...,...,...,...,...,...
37,U026,2016-11-10,,2016-10-20,2016-11-11
38,U027,2016-11-10,,2016-10-20,2016-11-11
39,U028,2016-11-15,,2016-10-20,2016-11-16
40,U030,2016-11-28,,2016-10-20,2016-11-29


### [12-5] 사용자의 액션 플래그를 계산하는 쿼리

In [10]:
query_125 = """
        WITH
        action_log_with_mst_users AS (
         SELECT
            u.user_id
          , u.register_date
            -- 액션 날짜와 로그 전체의 최신 날짜를 날짜 자료형으로 변환하기
          , CAST(a.stamp AS date) AS action_date
          , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
            -- 등록일 다음날의 날짜 계산하기
          , CAST(u.register_date::date + '1 day'::interval AS date)
            AS next_day_1
         FROM
            mst_users AS u
         LEFT OUTER JOIN
            action_log AS a
         ON u.user_id = a.user_id
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
             -- 4. 등록일 다음날에 액션을 했는지 안 했는지를 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 등록일 다음날에 한 액션의 합계 구하기
              SUM(
               -- 2. 등록일 다음날이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN next_day_1 <= latest_date THEN
                -- 1. 등록일 다음날의 날짜에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN next_day_1 = action_date THEN 1 ELSE 0 END
               END 
              )
           ) AS next_1_day_action
          FROM
             action_log_with_mst_users
          GROUP BY
             user_id, register_date
        )
        SELECT *
        FROM
           user_action_flag
        ORDER BY
           register_date, user_id
        ;
        """

select(query_125)

Unnamed: 0,user_id,register_date,next_1_day_action
0,U001,2016-10-01,0.0
1,U002,2016-10-01,1.0
2,U003,2016-10-01,0.0
3,U004,2016-10-05,0.0
4,U005,2016-10-05,0.0
...,...,...,...
25,U026,2016-11-10,
26,U027,2016-11-10,
27,U028,2016-11-15,
28,U029,2016-11-28,


### [12-6] 다음날 지속률을 계산하는 쿼리

In [11]:
query_126 = """
        WITH
        action_log_with_mst_users AS (
         SELECT
            u.user_id
          , u.register_date
            -- 액션 날짜와 로그 전체의 최신 날짜를 날짜 자료형으로 변환하기
          , CAST(a.stamp AS date) AS action_date
          , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
            -- 등록일 다음날의 날짜 계산하기
          , CAST(u.register_date::date + '1 day'::interval AS date)
            AS next_day_1
         FROM
            mst_users AS u
         LEFT OUTER JOIN
            action_log AS a
         ON u.user_id = a.user_id
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
             -- 4. 등록일 다음날에 액션을 했는지 안 했는지를 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 등록일 다음날에 한 액션의 합계 구하기
              SUM(
               -- 2. 등록일 다음날이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN next_day_1 <= latest_date THEN
                -- 1. 등록일 다음날의 날짜에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN next_day_1 = action_date THEN 1 ELSE 0 END
               END 
              )
           ) AS next_1_day_action
          FROM
             action_log_with_mst_users
          GROUP BY
             user_id, register_date
        )
        SELECT
           register_date
         , AVG(100.0 * next_1_day_action) AS repeat_rate_1_day
        FROM
           user_action_flag
        GROUP BY
           register_date
        ORDER BY
           register_date
        ;
        """

select(query_126)

Unnamed: 0,register_date,repeat_rate_1_day
0,2016-10-01,33.333333
1,2016-10-05,0.000000
2,2016-10-10,0.000000
3,2016-10-15,0.000000
4,2016-10-16,0.000000
...,...,...
10,2016-11-04,
11,2016-11-05,
12,2016-11-10,
13,2016-11-15,


### [12-7] 지속률 지표를 관리하는 마스터 테이블을 작성하는 쿼리

In [12]:
query_127 = """
        WITH
        repeat_interval(index_name, interval_date) AS (
         VALUES
            ('01 day repeat', 1)
          , ('02 day repeat', 2)
          , ('03 day repeat', 3)
          , ('04 day repeat', 4)
          , ('05 day repeat', 5)
          , ('06 day repeat', 6)
          , ('07 day repeat', 7)           
        )
        SELECT *
        FROM repeat_interval
        ORDER BY index_name
        ;
        """

select(query_127)

Unnamed: 0,index_name,interval_date
0,01 day repeat,1
1,02 day repeat,2
2,03 day repeat,3
3,04 day repeat,4
4,05 day repeat,5
5,06 day repeat,6
6,07 day repeat,7


### [12-8] 지속률을 세로 기반으로 집계하는 쿼리

In [13]:
query_128 = """
        WITH
        repeat_interval(index_name, interval_date) AS (
          VALUES
             ('01 day repeat', 1)
           , ('02 day repeat', 2)
           , ('03 day repeat', 3)
           , ('04 day repeat', 4)
           , ('05 day repeat', 5)
           , ('06 day repeat', 6)
           , ('07 day repeat', 7)           
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
             -- 등록일로부터 n일 후의 날짜 계산하기
           , r.index_name
           , CAST(CAST(u.register_date AS date)+ interval '1 day' * r.interval_date AS date)
             AS index_date
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 등록일 다음날에 액션을 했는지 안 했는지를 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 등록일 다음날에 한 액션의 합계 구하기
              SUM(
               -- 2. 등록일 다음날이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_date <= latest_date THEN
                -- 1. 등록일 다음날의 날짜에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN index_date = action_date THEN 1 ELSE 0 END
               END 
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_date
        )
        SELECT
           register_date
         , index_name
         , AVG(100.0 * index_date_action) AS repeat_rate
        FROM
           user_action_flag
        GROUP BY
           register_date, index_name
        ORDER BY
           register_date, index_name
        ;
        """

select(query_128)

Unnamed: 0,register_date,index_name,repeat_rate
0,2016-10-01,01 day repeat,33.333333
1,2016-10-01,02 day repeat,0.000000
2,2016-10-01,03 day repeat,0.000000
3,2016-10-01,04 day repeat,33.333333
4,2016-10-01,05 day repeat,0.000000
...,...,...,...
100,2016-11-28,03 day repeat,
101,2016-11-28,04 day repeat,
102,2016-11-28,05 day repeat,
103,2016-11-28,06 day repeat,


### [12-9] 정착률 지표를 관리하는 마스터 테이블을 작성하는 쿼리

In [14]:
query_129 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('07 day retention', 1, 7)
          , ('14 day retention', 8, 14)
          , ('21 day retention', 15, 21)
          , ('28 day retention', 12, 28)
        )
        SELECT *
        FROM repeat_interval
        ORDER BY index_name
        ;
        """

select(query_129)

Unnamed: 0,index_name,interval_begin_date,interval_end_date
0,07 day retention,1,7
1,14 day retention,8,14
2,21 day retention,15,21
3,28 day retention,12,28


### [12-10] 정착률을 계산하는 쿼리

In [15]:
query_1210 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('07 day retention', 1, 7)
          , ('14 day retention', 8, 14)
          , ('21 day retention', 15, 21)
          , ('28 day retention', 12, 28)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        SELECT
           register_date
         , index_name
         , AVG(100.0 * index_date_action) AS repeat_rate
        FROM
           user_action_flag
        GROUP BY
           register_date, index_name
        ORDER BY
           register_date, index_name
        ;
        """

select(query_1210)

Unnamed: 0,register_date,index_name,repeat_rate
0,2016-10-01,07 day retention,66.666667
1,2016-10-01,14 day retention,0.000000
2,2016-10-01,21 day retention,
3,2016-10-01,28 day retention,
4,2016-10-05,07 day retention,0.000000
...,...,...,...
55,2016-11-15,28 day retention,
56,2016-11-28,07 day retention,
57,2016-11-28,14 day retention,
58,2016-11-28,21 day retention,


### [12-11] 지속률 지표를 관리하는 마스터 테이블을 정착률 형식으로 수정한 쿼리

In [16]:
query_1211 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('01 day repeat'   , 1, 1)
          , ('02 day repeat'   , 2, 2)
          , ('03 day repeat'   , 3, 3)
          , ('04 day repeat'   , 4, 4)
          , ('05 day repeat'   , 5, 5)
          , ('06 day repeat'   , 6, 6)
          , ('07 day repeat'   , 7, 7)
          , ('07 day retention', 1, 7)
          , ('14 day retention', 8, 14)
          , ('21 day retention', 15, 21)
          , ('28 day retention', 12, 28)
        )
        SELECT *
        FROM repeat_interval
        ORDER BY index_name
        ;
        """

select(query_1211)

Unnamed: 0,index_name,interval_begin_date,interval_end_date
0,01 day repeat,1,1
1,02 day repeat,2,2
2,03 day repeat,3,3
3,04 day repeat,4,4
4,05 day repeat,5,5
...,...,...,...
6,07 day repeat,7,7
7,07 day retention,1,7
8,14 day retention,8,14
9,21 day retention,15,21


### [12-12] n일 지속률들을 집계하는 쿼리

In [17]:
query_1212 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('01 day repeat'   , 1, 1)
          , ('02 day repeat'   , 2, 2)
          , ('03 day repeat'   , 3, 3)
          , ('04 day repeat'   , 4, 4)
          , ('05 day repeat'   , 5, 5)
          , ('06 day repeat'   , 6, 6)
          , ('07 day repeat'   , 7, 7)
          , ('07 day retention', 1, 7)
          , ('14 day retention', 8, 14)
          , ('21 day retention', 15, 21)
          , ('28 day retention', 12, 28)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        SELECT
           index_name
         , AVG(100.0 * index_date_action) AS repeat_rate
        FROM
           user_action_flag
        GROUP BY
           index_name
        ORDER BY
           index_name
        ;
        """

select(query_1212)

Unnamed: 0,index_name,repeat_rate
0,01 day repeat,8.333333
1,02 day repeat,0.000000
2,03 day repeat,0.000000
3,04 day repeat,10.000000
4,05 day repeat,0.000000
...,...,...
6,07 day repeat,0.000000
7,07 day retention,25.000000
8,14 day retention,0.000000
9,21 day retention,


### [12-13] 모든 사용자와 액션의 조합을 도출하는 쿼리

In [27]:
query_1213 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('01 day repeat', 1, 1)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , mst_actions AS (
                    SELECT 'view'    AS action        
          UNION ALL SELECT 'comment' AS action
          UNION ALL SELECT 'follow'  AS action          
        )
        , mst_user_actions AS (
          SELECT
             u.user_id
           , u.register_date
           , a.action
          FROM
             mst_users AS u
          CROSS JOIN
             mst_actions AS a
        )
        SELECT *
        FROM
           mst_user_actions
        ORDER BY
           user_id, action
        ;
        """

select(query_1213)

Unnamed: 0,user_id,register_date,action
0,U001,2016-10-01,comment
1,U001,2016-10-01,follow
2,U001,2016-10-01,view
3,U002,2016-10-01,comment
4,U002,2016-10-01,follow
...,...,...,...
85,U029,2016-11-28,follow
86,U029,2016-11-28,view
87,U030,2016-11-28,comment
88,U030,2016-11-28,follow


### [12-14] 사용자의 액션 로그를 0, 1의 플래그로 표현하는 쿼리

In [28]:
query_1214 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('01 day repeat', 1, 1)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , mst_actions AS (
                    SELECT 'view'    AS action        
          UNION ALL SELECT 'comment' AS action
          UNION ALL SELECT 'follow'  AS action          
        )
        , mst_user_actions AS (
          SELECT
             u.user_id
           , u.register_date
           , a.action
          FROM
             mst_users AS u
          CROSS JOIN
             mst_actions AS a
        )
        , register_action_flag AS (
          SELECT DISTINCT
             m.user_id
           , m.register_date
           , m.action
           , CASE
              WHEN a.action IS NOT NULL THEN 1
              ELSE 0
             END AS do_action
           , index_name
           , index_date_action
          FROM
             mst_user_actions AS m
          LEFT JOIN
             action_log AS a
           ON m.user_id = a.user_id
           AND CAST(m.register_date AS date) = CAST(a.stamp AS date)
           AND m.action = a.action
          LEFT JOIN
             user_action_flag AS f
           ON m.user_id = f.user_id
          WHERE
             f.index_date_action IS NOT NULL
        )
        SELECT *
        FROM
           register_action_flag
        ORDER BY
           user_id, index_name, action
        ;
        """

select(query_1214)

Unnamed: 0,user_id,register_date,action,do_action,index_name,index_date_action
0,U001,2016-10-01,comment,0,01 day repeat,0.0
1,U001,2016-10-01,follow,0,01 day repeat,0.0
2,U001,2016-10-01,view,1,01 day repeat,0.0
3,U002,2016-10-01,comment,0,01 day repeat,1.0
4,U002,2016-10-01,follow,1,01 day repeat,1.0
...,...,...,...,...,...,...
31,U011,2016-10-18,follow,0,01 day repeat,0.0
32,U011,2016-10-18,view,0,01 day repeat,0.0
33,U012,2016-10-18,comment,0,01 day repeat,0.0
34,U012,2016-10-18,follow,0,01 day repeat,0.0


### [12-15] 액션에 따른 지속률과 정착률을 집계하는 쿼리

In [29]:
query_1215 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('01 day repeat', 1, 1)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , mst_actions AS (
                    SELECT 'view'    AS action        
          UNION ALL SELECT 'comment' AS action
          UNION ALL SELECT 'follow'  AS action          
        )
        , mst_user_actions AS (
          SELECT
             u.user_id
           , u.register_date
           , a.action
          FROM
             mst_users AS u
          CROSS JOIN
             mst_actions AS a
        )
        , register_action_flag AS (
          SELECT DISTINCT
             m.user_id
           , m.register_date
           , m.action
           , CASE
              WHEN a.action IS NOT NULL THEN 1
              ELSE 0
             END AS do_action
           , index_name
           , index_date_action
          FROM
             mst_user_actions AS m
          LEFT JOIN
             action_log AS a
           ON m.user_id = a.user_id
           AND CAST(m.register_date AS date) = CAST(a.stamp AS date)
           AND m.action = a.action
          LEFT JOIN
             user_action_flag AS f
           ON m.user_id = f.user_id
          WHERE
             f.index_date_action IS NOT NULL
        )
        SELECT
           action
         , COUNT(*) AS users
         , AVG(100.0 * do_action) AS usage_rate
         , index_name
         , AVG(CASE do_action WHEN 1 THEN 100.0 * index_date_action END) AS idx_rate
         , AVG(CASE do_action WHEN 0 THEN 100.0 * index_date_action END) AS no_action_idx_rate         
        FROM
           register_action_flag
        GROUP BY
           index_name, action
        ORDER BY
           index_name, action
        ;
        """

select(query_1215)

Unnamed: 0,action,users,usage_rate,index_name,idx_rate,no_action_idx_rate
0,comment,12,0.0,01 day repeat,,8.333333
1,follow,12,8.333333,01 day repeat,100.0,0.0
2,view,12,8.333333,01 day repeat,0.0,9.090909


### [12-16] 액션의 계급 마스터와 사용자 액션 플래그의 조합을 산출하는 쿼리

In [31]:
query_1216 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('14 day retention', 8, 14)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , mst_action_bucket(action, min_count, max_count) AS (
          VALUES
            ('comment',  0,    0)
          , ('comment',  1,    5)
          , ('comment',  6,   10)
          , ('comment', 11, 9999) -- 최대값으로 간단하게 9999 입력
          , ('follow' ,  0,    0)
          , ('follow' ,  1,    5)          
          , ('follow' ,  6,   10)          
          , ('follow' , 11, 9999)
        )
        , mst_user_action_bucket AS (
          SELECT
             u.user_id
           , u.register_date
           , a.action
           , a.min_count
           , a.max_count
          FROM
             mst_users AS u
          CROSS JOIN
             mst_action_bucket AS a
        )
        SELECT *
        FROM
           mst_user_action_bucket
        ORDER BY
           user_id, action, min_count
        ;
        """

select(query_1216)

Unnamed: 0,user_id,register_date,action,min_count,max_count
0,U001,2016-10-01,comment,0,0
1,U001,2016-10-01,comment,1,5
2,U001,2016-10-01,comment,6,10
3,U001,2016-10-01,comment,11,9999
4,U001,2016-10-01,follow,0,0
...,...,...,...,...,...
235,U030,2016-11-28,comment,11,9999
236,U030,2016-11-28,follow,0,0
237,U030,2016-11-28,follow,1,5
238,U030,2016-11-28,follow,6,10


### [12-17] 등록 후 7일 동안의 액션 수를 집계하는 쿼리

In [32]:
query_1217 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('14 day retention', 8, 14)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , mst_action_bucket(action, min_count, max_count) AS (
          VALUES
            ('comment',  0,    0)
          , ('comment',  1,    5)
          , ('comment',  6,   10)
          , ('comment', 11, 9999) -- 최대값으로 간단하게 9999 입력
          , ('follow' ,  0,    0)
          , ('follow' ,  1,    5)          
          , ('follow' ,  6,   10)          
          , ('follow' , 11, 9999)
        )
        , mst_user_action_bucket AS (
          SELECT
             u.user_id
           , u.register_date
           , a.action
           , a.min_count
           , a.max_count
          FROM
             mst_users AS u
          CROSS JOIN
             mst_action_bucket AS a
        )
        , register_action_flag AS (
           -- 등록일에서 7일 후까지의 액션 수를 세고,
           -- 액션 단계와 14일 정착 달성 플래그 계산하기
           SELECT
             m.user_id
           , m.action
           , m.min_count
           , m.max_count
           , COUNT(a.action) AS action_count
           , CASE
              WHEN COUNT(a.action) BETWEEN m.min_count AND m.max_count THEN 1
              ELSE 0
             END AS achieve
           , index_name
           , index_date_action
          FROM
             mst_user_action_bucket AS m
          LEFT JOIN
             action_log AS a
           ON m.user_id = a.user_id
           -- 등록일 당일부터 7일 후까지의 액션 로그 결합하기
           AND CAST(a.stamp AS date)
                BETWEEN CAST(m.register_date AS date)
                    AND CAST(m.register_date AS date) + interval '7 days'
           AND m.action = a.action
          LEFT JOIN
             user_action_flag AS f
           ON m.user_id = f.user_id
          WHERE
             f.index_date_action IS NOT NULL
          GROUP BY
             m.user_id
           , m.action
           , m.min_count
           , m.max_count
           , f.index_name
           , f.index_date_action
        )
        SELECT *
        FROM
           register_action_flag
        ORDER BY
           user_id, action, min_count
        ;
        """

select(query_1217)

Unnamed: 0,user_id,action,min_count,max_count,action_count,achieve,index_name,index_date_action
0,U001,comment,0,0,0,1,14 day retention,0.0
1,U001,comment,1,5,0,0,14 day retention,0.0
2,U001,comment,6,10,0,0,14 day retention,0.0
3,U001,comment,11,9999,0,0,14 day retention,0.0
4,U001,follow,0,0,2,0,14 day retention,0.0
...,...,...,...,...,...,...,...,...
35,U005,comment,11,9999,0,0,14 day retention,0.0
36,U005,follow,0,0,0,1,14 day retention,0.0
37,U005,follow,1,5,0,0,14 day retention,0.0
38,U005,follow,6,10,0,0,14 day retention,0.0


### [12-18] 등록 후 7일 동안의 액션 횟수별로 14일 정착률을 집계하는 쿼리

In [33]:
query_1218 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('14 day retention', 8, 14)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , mst_action_bucket(action, min_count, max_count) AS (
          VALUES
            ('comment',  0,    0)
          , ('comment',  1,    5)
          , ('comment',  6,   10)
          , ('comment', 11, 9999) -- 최대값으로 간단하게 9999 입력
          , ('follow' ,  0,    0)
          , ('follow' ,  1,    5)          
          , ('follow' ,  6,   10)          
          , ('follow' , 11, 9999)
        )
        , mst_user_action_bucket AS (
          SELECT
             u.user_id
           , u.register_date
           , a.action
           , a.min_count
           , a.max_count
          FROM
             mst_users AS u
          CROSS JOIN
             mst_action_bucket AS a
        )
        , register_action_flag AS (
           -- 등록일에서 7일 후까지의 액션 수를 세고,
           -- 액션 단계와 14일 정착 달성 플래그 계산하기
           SELECT
             m.user_id
           , m.action
           , m.min_count
           , m.max_count
           , COUNT(a.action) AS action_count
           , CASE
              WHEN COUNT(a.action) BETWEEN m.min_count AND m.max_count THEN 1
              ELSE 0
             END AS achieve
           , index_name
           , index_date_action
          FROM
             mst_user_action_bucket AS m
          LEFT JOIN
             action_log AS a
           ON m.user_id = a.user_id
           -- 등록일 당일부터 7일 후까지의 액션 로그 결합하기
           AND CAST(a.stamp AS date)
                BETWEEN CAST(m.register_date AS date)
                    AND CAST(m.register_date AS date) + interval '7 days'
           AND m.action = a.action
          LEFT JOIN
             user_action_flag AS f
           ON m.user_id = f.user_id
          WHERE
             f.index_date_action IS NOT NULL
          GROUP BY
             m.user_id
           , m.action
           , m.min_count
           , m.max_count
           , f.index_name
           , f.index_date_action
        )
        SELECT
           action
         , min_count || ' ~ ' || max_count AS count_range
         , SUM(CASE achieve WHEN 1 THEN 1 ELSE 0 END) AS achieve
         , index_name
         , AVG(CASE achieve WHEN 1 THEN 100.0 * index_date_action END) AS achieve_index_rate
        FROM
           register_action_flag
        GROUP BY
           index_name, action, min_count, max_count
        ORDER BY
           index_name, action, min_count
        ;
        """

select(query_1218)

Unnamed: 0,action,count_range,achieve,index_name,achieve_index_rate
0,comment,0 ~ 0,4,14 day retention,0.0
1,comment,1 ~ 5,1,14 day retention,0.0
2,comment,6 ~ 10,0,14 day retention,
3,comment,11 ~ 9999,0,14 day retention,
4,follow,0 ~ 0,3,14 day retention,0.0
5,follow,1 ~ 5,2,14 day retention,0.0
6,follow,6 ~ 10,0,14 day retention,
7,follow,11 ~ 9999,0,14 day retention,


### [12-19] 등록일 다음날부터 7일 동안의 사용 일수와 28일 정착 플래그를 생성하는 쿼리

In [35]:
query_1219 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('28 day retention', 22, 28)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , register_action_flag AS (
          SELECT
             m.user_id
           , COUNT(DISTINCT CAST(a.stamp AS date)) AS dt_count
           , index_name
           , index_date_action
          FROM
             mst_users AS m
          LEFT JOIN
             action_log AS a
           ON m.user_id = a.user_id
           -- 등록 다음날부터 7일 이내의 액션 로그 결합하기
           AND CAST(a.stamp AS date)
               BETWEEN CAST(m.register_date AS date) + interval '1 day'
                   AND CAST(m.register_date AS date) + interval '8 days'
          LEFT JOIN
             user_action_flag AS f
           ON m.user_id = f.user_id
          WHERE
             f.index_date_action IS NOT NULL
          GROUP BY
             m.user_id
           , f.index_name
           , f.index_date_action
        )
        SELECT *
        FROM register_action_flag
        ;
        """

select(query_1219)

Unnamed: 0,user_id,dt_count,index_name,index_date_action


### [12-20] 사용 일수에 따른 정착률을 집계하는 쿼리

In [36]:
query_1220 = """
        WITH
        repeat_interval(index_name, interval_begin_date, interval_end_date) AS (
         VALUES
            ('28 day retention', 22, 28)
        )
        , action_log_with_index_date AS (
          SELECT
             u.user_id
           , u.register_date
           , CAST(a.stamp AS date) AS action_date
           , MAX(CAST(a.stamp AS date)) OVER() AS latest_date
           , r.index_name
             -- 지표의 대상 기간 시작일과 종류일 계산하기
           , CAST(u.register_date::date + '1 day'::interval * r.interval_begin_date AS date)
             AS index_begin_date
           , CAST(u.register_date::date + '1 day'::interval * r.interval_end_date AS date)
             AS index_end_date             
          FROM
             mst_users AS u
          LEFT OUTER JOIN
             action_log AS a
          ON u.user_id = a.user_id
          CROSS JOIN
             repeat_interval AS r
        )
        , user_action_flag AS (
          SELECT
             user_id
           , register_date
           , index_name
             -- 4. 지표의 대상 기간에 액션을 했는지 플래그로 나타내기
           , SIGN(
              -- 3. 사용자별로 대상 기간에 한 액션의 합계 구하기
              SUM(
               -- 2. 대상 기간의 종료일이 로그의 최신 날짜 이전인지 확인하기
               CASE WHEN index_end_date <= latest_date THEN
                -- 1. 지표의 대상 기간에 액션을 했다면 1, 안 했다면 0 지정하기
                CASE WHEN action_date BETWEEN index_begin_date AND index_end_date
                 THEN 1 ELSE 0 END
               END
              )
           ) AS index_date_action
          FROM
             action_log_with_index_date
          GROUP BY
             user_id, register_date, index_name, index_begin_date, index_end_date
        )
        , register_action_flag AS (
          SELECT
             m.user_id
           , COUNT(DISTINCT CAST(a.stamp AS date)) AS dt_count
           , index_name
           , index_date_action
          FROM
             mst_users AS m
          LEFT JOIN
             action_log AS a
           ON m.user_id = a.user_id
           -- 등록 다음날부터 7일 이내의 액션 로그 결합하기
           AND CAST(a.stamp AS date)
               BETWEEN CAST(m.register_date AS date) + interval '1 day'
                   AND CAST(m.register_date AS date) + interval '8 days'
          LEFT JOIN
             user_action_flag AS f
           ON m.user_id = f.user_id
          WHERE
             f.index_date_action IS NOT NULL
          GROUP BY
             m.user_id
           , f.index_name
           , f.index_date_action
        )
        SELECT
           dt_count AS dates
         , COUNT(user_id) AS users
         , 100.0 * COUNT(user_id) / SUM(COUNT(user_id)) OVER() AS user_ratio
         , 100.0
           * SUM(COUNT(user_id))
              OVER(ORDER BY index_name, dt_count
               ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
           / SUM(COUNT(user_id)) OVER() AS cum_ratio
         , SUM(index_date_action) AS achieve_users
         , AVG(100.0 * index_date_action) AS achieve_ratio
        FROM 
           register_action_flag
        GROUP BY
           index_name, dt_count
        ORDER BY
           index_name, dt_count
        ;
        """

select(query_1220)

Unnamed: 0,dates,users,user_ratio,cum_ratio,achieve_users,achieve_ratio
