# Когортный анализ

In [1]:
import pandas as pd
from settings import db_config
from sqlalchemy import create_engine
pd.set_option("display.precision", 2)

In [2]:
# устанавливаем параметры
connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_config.USER, 
                                                         db_config.PWD, 
                                                         db_config.HOST, 
                                                         db_config.PORT,
                                                         db_config.DB)

In [3]:
# сохраняем коннектор
engine = create_engine(connection_string)

In [4]:
def sql_request(query):
    """
    Функция запроса к базе данных и возврата ответа в виде pandas датафрейма

    query - запрос
    """

    return pd.read_sql(query, con = engine)

## Расчёт Retention Rate

In [5]:
query = """
WITH profile AS
  (SELECT u.user_id,
          DATE_TRUNC('month', MIN(event_time))::date AS cohort_start
   FROM tools_shop.users u
   JOIN tools_shop.orders o ON u.user_id = o.user_id
   JOIN tools_shop.events e ON u.user_id = e.user_id
   GROUP BY 1), 
sessions AS
  (SELECT p.user_id,
          DATE_TRUNC('month', event_time)::date AS session_dt
   FROM tools_shop.events e
   JOIN profile p ON p.user_id = e.user_id
   GROUP BY 1,
            2),
cohort_users_cnt AS
  (SELECT cohort_start,
          COUNT(user_id) AS cohort_users_cnt
   FROM profile
   GROUP BY 1)
SELECT  p.cohort_start, 
        session_dt,
        cohort_users_cnt,
        COUNT(p.user_id) AS users_cnt,
        ROUND(COUNT(p.user_id) * 100.0 / cohort_users_cnt, 2) AS retention_rate
FROM profile p
JOIN sessions s ON p.user_id = s.user_id
JOIN cohort_users_cnt cnt ON cnt.cohort_start = s.session_dt
GROUP BY 1,2,3
ORDER BY p.cohort_start;
"""

In [6]:
sql_request(query)

Unnamed: 0,cohort_start,session_dt,cohort_users_cnt,users_cnt,retention_rate
0,2016-03-01,2016-03-01,4,4,100.00
1,2016-03-01,2016-07-01,60,2,3.33
2,2016-03-01,2016-08-01,61,1,1.64
3,2016-03-01,2016-09-01,82,2,2.44
4,2016-04-01,2016-04-01,12,12,100.00
...,...,...,...,...,...
611,2021-04-01,2021-05-01,313,73,23.32
612,2021-04-01,2021-06-01,7,4,57.14
613,2021-05-01,2021-05-01,313,313,100.00
614,2021-05-01,2021-06-01,7,6,85.71


## Расчёт LTV

In [7]:
# LTV в первые шесть месяцев с момента регистрации пользователей, которые зарегистрировались в 2019 году и совершили хотя бы одну покупку
query = """
WITH a AS (
    SELECT u.user_id,
           DATE_TRUNC('month', u.created_at)::date AS reg_month,
           EXTRACT(MONTH FROM AGE(o.created_at, u.created_at)) AS lifetime,
           SUM(total_amt) OVER (PARTITION BY u.user_id ORDER BY o.created_at) AS ltv
    FROM tools_shop.users AS u
    JOIN tools_shop.orders AS o ON u.user_id = o.user_id
    WHERE EXTRACT(YEAR FROM u.created_at) = 2019
)
SELECT reg_month,
       lifetime,
       ROUND(AVG(ltv), 2) AS avg_ltv
FROM a
WHERE lifetime <= 5
GROUP BY reg_month, lifetime;
"""

In [8]:
sql_request(query)

Unnamed: 0,reg_month,lifetime,avg_ltv
0,2019-01-01,0.0,196.02
1,2019-01-01,1.0,373.55
2,2019-01-01,2.0,254.40
3,2019-01-01,3.0,167.58
4,2019-01-01,4.0,320.04
...,...,...,...
67,2019-12-01,1.0,250.11
68,2019-12-01,2.0,383.20
69,2019-12-01,3.0,369.93
70,2019-12-01,4.0,324.07


## Расчёт Churn Rate

In [9]:
query = """
WITH start AS (
    SELECT u.user_id,
           DATE_TRUNC('month', MIN(e.event_time))::date AS start_dt 
    FROM tools_shop.users AS u
    JOIN tools_shop.orders AS o ON u.user_id = o.user_id
    JOIN tools_shop.events AS e ON u.user_id = e.user_id
    GROUP BY u.user_id
),
cohorts AS (
    SELECT s.start_dt,
           DATE_TRUNC('month', e.event_time)::date AS event_month,
           COUNT(DISTINCT s.user_id) AS users_cnt
    FROM start AS s
    JOIN tools_shop.events AS e ON s.user_id = e.user_id
    GROUP BY 1,
             2   
)
SELECT *,
       LAG(users_cnt) OVER (PARTITION BY start_dt ORDER BY event_month) AS previous_day_users_cnt,
       ROUND((1 - (users_cnt::numeric/ LAG(users_cnt) OVER 
                   (PARTITION BY start_dt ORDER BY event_month))) * 100, 2) AS churn_rate
FROM cohorts; 
"""

In [10]:
sql_request(query)

Unnamed: 0,start_dt,event_month,users_cnt,previous_day_users_cnt,churn_rate
0,2016-03-01,2016-03-01,4,,
1,2016-03-01,2016-07-01,2,4.0,50.00
2,2016-03-01,2016-08-01,1,2.0,50.00
3,2016-03-01,2016-09-01,2,1.0,-100.00
4,2016-04-01,2016-04-01,12,,
...,...,...,...,...,...
611,2021-04-01,2021-05-01,73,372.0,80.38
612,2021-04-01,2021-06-01,4,73.0,94.52
613,2021-05-01,2021-05-01,313,,
614,2021-05-01,2021-06-01,6,313.0,98.08


In [11]:
engine.dispose()