In [3]:
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import seaborn as sns 
import statsmodels.stats.proportion as proportion
from scipy.stats import ttest_ind,mannwhitneyu,shapiro,norm
from statsmodels.stats.weightstats import ztest
from tqdm import tqdm
import timeit
from scipy import stats
import math
from datetime import date, datetime, timedelta
import time
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
import warnings
warnings.filterwarnings("ignore")
import clickhouse_connect  



from credential import postgres_secret,clickhouse_dwh_secret

def get_engine(user):
    if user == postgres_secret['user']:
        db_name = postgres_secret['db_name']
        password = postgres_secret['password']
        host = postgres_secret['host']
        engine = create_engine(f'postgresql://{user}:{password}@{host}:6432/{db_name}')
    elif user == clickhouse_dwh_secret['user']:
            db_name = clickhouse_dwh_secret['db_name'] 
            password = clickhouse_dwh_secret['password']
            host = clickhouse_dwh_secret['host']
            engine = create_engine(f'clickhouse://{user}:{password}@{host}:8123/{db_name}')
    return engine
    
connection_clickhouse = clickhouse_connect.get_client(
    host = clickhouse_dwh_secret['host'],
    port= '8123',
    username = clickhouse_dwh_secret['user'],
    password = clickhouse_dwh_secret['password'],
    database='datamarts'
    )

    
def execute(SQL, user):
    start_time = time.time()  # запоминаем время начала выполнения функции
    engine = get_engine(user)
    Session = sessionmaker(bind=engine)  # sessions factory ()
    with Session() as session: # open session
        result = session.execute(text(SQL))
        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        
    end_time = time.time()  # запоминаем время окончания выполнения функции
    execution_time = round(end_time - start_time,4) # вычисляем время выполнения   
    
    print(f"Время выполнения функции: {execution_time} секунд")
    print()
    return df

In [11]:
# Новые Посетители
query = '''SELECT 
toStartOfMonth(min_date_visitor) AS date_month,
uniq(visitor_id) AS cnt_user
FROM
(SELECT min(date) AS min_date_visitor,visitor_id
FROM datamarts.clean_event
WHERE client_type in ('ios','android')
GROUP BY 2)
WHERE date_month BETWEEN '2025-01-01' AND '2025-06-01'
GROUP BY 1
'''

pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='date_month',aggfunc='sum')

Время выполнения функции: 2.6334 секунд



date_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
cnt_user,43941,31855,35388,32386,37146,35994


In [15]:
# Новые реги
query = '''SELECT 
toStartOfMonth(reg_date) AS reg_month,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash
WHERE reg_month BETWEEN '2025-01-01' AND '2025-06-01'
AND device in ('apple','android')
--WHERE reg_month BETWEEN now() - interval 7 month AND now() - interval 1 month
GROUP BY 1
'''

pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='reg_month',aggfunc='sum')

Время выполнения функции: 0.7247 секунд



reg_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
cnt_user,8524,7293,8539,8783,6785,7577


In [17]:
# Новые Триалы ВСЕ

query = '''SELECT 
toStartOfMonth(created_at) AS trial_month,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash x
WHERE trial_month BETWEEN '2025-01-01' AND '2025-06-01'
--WHERE trial_month BETWEEN now() - interval 7 month AND now() - interval 1 month
AND device in ('apple','android')
GROUP BY 1
'''


pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='trial_month',aggfunc='sum')


Время выполнения функции: 0.9124 секунд



trial_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
cnt_user,1545,1121,1115,1283,1269,1397


In [19]:
# Новые Триалы РАЗБИВКА ПО ПЕРИОДАМ


query = '''SELECT  
toStartOfMonth(created_at) AS trial_month,
CASE WHEN free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'     --toString(free_days)
END AS free_days,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash x
WHERE trial_month::date BETWEEN '2025-01-01' AND '2025-06-01'
AND device in ('apple','android')
GROUP BY 1,2
ORDER BY 1,2
'''

pivot_table = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='trial_month',index='free_days',aggfunc='sum')
pivot_table = pivot_table.fillna(0)
pivot_table = pivot_table.astype(int)  

#pivot_table.iloc[:-1]
pivot_table

Время выполнения функции: 0.9094 секунд



Unnamed: 0_level_0,cnt_user,cnt_user,cnt_user,cnt_user,cnt_user,cnt_user
trial_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
(1) 3_month12,11,14,27,74,45,26
(2) 3_month3,41,22,32,44,23,27
(3) 14_month1,793,617,557,658,545,754
(4) 30_month1,48,77,92,112,109,191
(5) 30_month3,26,18,29,33,33,26
(6) 35_month3,348,283,280,318,316,217
(7) 45_month3,57,12,3,14,0,0
(8) other,221,78,95,30,198,156


In [21]:
# Новые подписчики ВСЕ
query = '''SELECT 
toStartOfMonth(first_prolong_date) AS subs_month,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash AS x
--WHERE subs_month BETWEEN now() - interval 7 month AND now() - interval 1 month
WHERE reg_date!='1970-01-01' AND subs_month::date BETWEEN '2025-01-01' AND '2025-06-01'
AND device in ('apple','android')
GROUP BY 1
'''

pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='subs_month',aggfunc='sum')

Время выполнения функции: 1.0107 секунд



subs_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
cnt_user,295,252,304,375,343,325


In [23]:
# Новые подписчики РАЗБИВКА ПО ПЕРИОДАМ
query = '''SELECT 
toStartOfMonth(first_prolong_date) AS subs_month,
CASE WHEN free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'    --toString(free_days)
END AS free_days,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash AS x
WHERE subs_month::date BETWEEN '2025-01-01' AND '2025-06-01'
--subs_month BETWEEN now() - interval 7 month AND now() - interval 1 month
AND reg_date!='1970-01-01'
AND device in ('apple','android')
GROUP BY 1,2
'''

pivot_table = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='subs_month',index='free_days',aggfunc='sum')
pivot_table = pivot_table.fillna(0)
pivot_table = pivot_table.astype(int)  

# pivot_table.iloc[:-1]
pivot_table

Время выполнения функции: 0.905 секунд



Unnamed: 0_level_0,cnt_user,cnt_user,cnt_user,cnt_user,cnt_user,cnt_user
subs_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
(1) 3_month12,9,10,16,51,30,18
(2) 3_month3,25,14,16,30,18,13
(3) 14_month1,180,125,154,190,148,128
(4) 30_month1,3,9,16,30,21,85
(5) 30_month3,1,1,3,5,9,5
(6) 35_month3,56,60,52,62,71,51
(7) 45_month3,1,2,5,1,0,0
(8) other,20,31,42,6,46,25


In [25]:
# ARPU ВСЕ

query = '''
SELECT 
toStartOfMonth(paid_date) AS paid_month,
sum(payment)/uniq(user_id) AS ARPPU
FROM datamarts.finance 
WHERE paid_month::date BETWEEN '2025-01-01' AND '2025-06-01'
--WHERE paid_month BETWEEN now() - interval 7 month AND now() - interval 1 month
AND reg_device in ('apple','android')
GROUP BY 1
'''

pivot_df = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='paid_month',aggfunc='sum')
pivot_df = pivot_df.astype(int) 

pivot_df

Время выполнения функции: 0.6651 секунд



paid_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
ARPPU,367,347,351,399,381,349


In [27]:
# ARPU ПО СЕГМЕНТАМ

query = '''
SELECT 
toStartOfMonth(paid_date) AS paid_month,
CASE WHEN t2.free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN t2.free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN t2.free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN t2.free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN t2.free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN t2.free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN t2.free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days,
sum(payment)/uniq(t1. user_id) AS ARPPU
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
WHERE paid_month::date BETWEEN '2025-01-01' AND '2025-06-01'
--WHERE paid_month BETWEEN now() - interval 7 month AND now() - interval 1 month
--AND free_days IN ('3_month3','3_month12','14','30','35','45')
AND reg_device in ('apple','android')
GROUP BY 1,2
'''



pivot_table = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='paid_month',index='free_days',aggfunc='sum')
pivot_table = pivot_table.fillna(0)
pivot_table = pivot_table.astype(int) 

# pivot_table.iloc[:-1]
pivot_table

Время выполнения функции: 1.208 секунд



Unnamed: 0_level_0,ARPPU,ARPPU,ARPPU,ARPPU,ARPPU,ARPPU
paid_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
(1) 3_month12,1159,1190,1294,1331,1466,1475
(2) 3_month3,481,489,485,519,535,539
(3) 14_month1,322,299,303,308,324,343
(4) 30_month1,299,299,299,211,299,98
(5) 30_month3,399,384,399,399,399,365
(6) 35_month3,396,392,396,395,391,396
(7) 45_month3,399,399,376,199,299,399
(8) other,350,340,325,348,213,267


In [137]:
# Число платящих и ретеншн в повторную по старым пользователям

#pd.options.display.float_format = '{:.6f}'.format 

query = '''   
SELECT * FROM 
(SELECT 
paid_date,
t1.user_id AS user_id,
CASE WHEN offer_duration='1 month' THEN '1 month'
     WHEN offer_duration='3 month' THEN '3 month'
     WHEN offer_duration='12 month' THEN '12 month'
     ELSE 'other_1_month'
     END offer_type,
payment,
reg_device,
ends_at,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days,ends_at FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2024-01-01' AND '2024-12-31' AND ends_at>='2025-01-01'
--WHERE min_paid_date BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
--AND reg_device in ('apple','android')
'''

df_payment = execute(query,user='kmekhtiev')

#pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],columns='num_of_payment',values=['user_id'],aggfunc={'user_id': "nunique"})
pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],index='offer_type',columns='num_of_payment',values=['user_id','payment'],aggfunc={'user_id': "nunique",'payment':'mean'})



pivot_table

Время выполнения функции: 4.477 секунд



Unnamed: 0_level_0,payment,payment,payment,payment,payment,payment,user_id,user_id,user_id,user_id,user_id,user_id
num_of_payment,1,2,3,4,5,6,1,2,3,4,5,6
offer_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1 month,274.13773,292.521981,293.888759,293.899474,294.080443,293.944992,2396,1979,1717,1522,1355,1218
12 month,1115.676728,1145.932143,792.058824,735.4,339.0,549.2,1027,280,17,10,5,5
3 month,393.488942,406.206235,411.127512,428.490617,469.0,479.434783,4567,2502,1443,373,60,23
other_1_month,553.983333,662.474227,697.207547,590.666667,599.0,599.0,120,97,53,12,2,2


In [139]:
# Число платящих и ретеншн в повторную по старым пользователям

#pd.options.display.float_format = '{:.6f}'.format 

query = '''   
SELECT * FROM 
(SELECT 
paid_date,
t1.user_id AS user_id,
CASE WHEN offer_duration='1 month' THEN '1 month'
     WHEN offer_duration='3 month' THEN '3 month'
     WHEN offer_duration='12 month' THEN '12 month'
     ELSE 'other_1_month'
     END offer_type,
payment,
reg_device,
ends_at,
payer,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days,ends_at,payer FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2024-01-01' AND '2024-12-31' AND ends_at>='2025-01-01' AND payer=1
--WHERE min_paid_date BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
--AND reg_device in ('apple','android')
'''

df_payment = execute(query,user='kmekhtiev')

#pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],columns='num_of_payment',values=['user_id'],aggfunc={'user_id': "nunique"})
pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],index='offer_type',columns='num_of_payment',values=['user_id','payment'],aggfunc={'user_id': "nunique",'payment':'mean'})



pivot_table

Время выполнения функции: 3.5672 секунд



Unnamed: 0_level_0,payment,payment,payment,payment,payment,payment,user_id,user_id,user_id,user_id,user_id,user_id
num_of_payment,1,2,3,4,5,6,1,2,3,4,5,6
offer_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1 month,273.995343,292.538265,293.91701,293.934884,294.073936,293.948548,2362,1960,1699,1505,1339,1205
12 month,1117.645383,1150.960432,822.875,783.888889,349.0,611.75,1018,278,16,9,4,4
3 month,393.643787,406.258781,410.986063,428.459459,471.413793,494.454545,4531,2477,1435,370,58,22
other_1_month,558.305085,666.260417,704.865385,617.181818,599.0,599.0,118,96,52,11,2,2


In [35]:
# Число платящих и ретеншн в повторную подписку РАЗБИВКА ПО ПЕРИОДАМ

#pd.options.display.float_format = '{:.6f}'.format 

query = '''   
SELECT * FROM
(SELECT 
paid_date,
t1.user_id AS user_id,
payment,
reg_device,
CASE WHEN t2.free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN t2.free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN t2.free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN t2.free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN t2.free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN t2.free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN t2.free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2025-01-01' AND '2025-06-30'
--WHERE min_paid_date BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
AND reg_device in ('apple','android')
'''

df_payment = execute(query,user='kmekhtiev')

pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],columns='num_of_payment',index='off',values=['user_id'],aggfunc={'user_id': "nunique"})

pivot_table = pivot_table.fillna(0)
r = pivot_table.div(pivot_table[('user_id', 1)], axis=0)

# pd.concat([pivot_table,r]).sort_values(by='free_days').iloc[:-2]
pd.concat([pivot_table,r]).sort_values(by='free_days')

Время выполнения функции: 1.4168 секунд



Unnamed: 0_level_0,user_id,user_id,user_id,user_id,user_id,user_id
num_of_payment,1,2,3,4,5,6
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
(1) 3_month12,134.0,2.0,1.0,0.0,0.0,0.0
(1) 3_month12,1.0,0.014925,0.007463,0.0,0.0,0.0
(2) 3_month3,121.0,41.0,5.0,0.0,0.0,0.0
(2) 3_month3,1.0,0.338843,0.041322,0.0,0.0,0.0
(3) 14_month1,941.0,433.0,241.0,132.0,70.0,34.0
(3) 14_month1,1.0,0.460149,0.256111,0.140276,0.074389,0.036132
(4) 30_month1,178.0,22.0,9.0,3.0,1.0,1.0
(4) 30_month1,1.0,0.123596,0.050562,0.016854,0.005618,0.005618
(5) 30_month3,25.0,4.0,1.0,0.0,0.0,0.0
(5) 30_month3,1.0,0.16,0.04,0.0,0.0,0.0


In [41]:
# Watchtime ВСЕ
query = '''SELECT 
toStartOfMonth(date) AS date_month,
sum(watchtime)/60 AS watchtime,
uniq(profile_id) AS cnt_user,
round(watchtime/cnt_user,0)::int AS watchtime_per_user
FROM datamarts.watchtime_by_day AS x
WHERE date_month BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
AND client_type in ('ios','android')
GROUP BY 1
'''

pd.pivot_table(execute(query,user='kmekhtiev'),columns='date_month')

Время выполнения функции: 0.7253 секунд



date_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
cnt_user,20312.0,18364.0,20662.0,20776.0,18174.0,20623.0
watchtime,714515.733333,645134.9,695958.383333,1056974.0,970479.533333,922543.55
watchtime_per_user,35.0,35.0,34.0,51.0,53.0,45.0


In [49]:
# Watchtime РАЗБИВКА ПО СЕГМЕНТАМ
query = '''SELECT
date_month,
free_days,
watchtime_per_user
FROM
(SELECT 
toStartOfMonth(date) AS date_month,
CASE WHEN free_days='3' AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN free_days='3' AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN free_days='14' AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN free_days='30' AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN free_days='30' AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN free_days='35' AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN free_days='45' AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days,
sum(watchtime)/60 AS watchtime,
uniq(profile_id) AS cnt_user,
round(watchtime/cnt_user,0)::int AS watchtime_per_user
FROM datamarts.watchtime_by_day AS x
LEFT JOIN datamarts.marketing_dash AS t on x.profile_id=t.profile_id
WHERE date_month BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
AND client_type in ('ios','android')
GROUP BY 1,2
)
'''

pd.pivot_table(execute(query,user='kmekhtiev'),columns='date_month',index='free_days')

Время выполнения функции: 1.1462 секунд



Unnamed: 0_level_0,watchtime_per_user,watchtime_per_user,watchtime_per_user,watchtime_per_user,watchtime_per_user,watchtime_per_user
date_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
(1) 3_month12,64.0,74.0,101.0,92.0,73.0,76.0
(2) 3_month3,151.0,168.0,197.0,222.0,257.0,142.0
(3) 14_month1,107.0,106.0,102.0,221.0,217.0,156.0
(4) 30_month1,86.0,69.0,125.0,229.0,166.0,165.0
(5) 30_month3,43.0,65.0,29.0,68.0,76.0,124.0
(6) 35_month3,48.0,39.0,39.0,52.0,44.0,41.0
(7) 45_month3,30.0,42.0,19.0,17.0,5.0,6.0
(8) other,18.0,20.0,18.0,18.0,23.0,19.0


# Фактическая выручка от новых пользователей

In [180]:
query = '''SELECT * FROM
(SELECT 
paid_date,
t1.user_id AS user_id,
payment,
CASE WHEN t2.free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN t2.free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN t2.free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN t2.free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN t2.free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN t2.free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN t2.free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days_new,
free_days AS free_days_old,
offer_duration,
payer,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days,payer FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2025-01-01' AND '2025-05-31' AND  paid_date::date BETWEEN '2025-01-01' AND '2025-05-31' AND payer=1
'''

df_fact = execute(query,user='kmekhtiev')
df_fact['paid_date'] = df_fact['paid_date'].astype('datetime64[ns]')
df_fact['paid_date_month'] = df_fact['paid_date'].dt.to_period('M')

Время выполнения функции: 1.5991 секунд



In [181]:
df_fact[(df_fact['free_days_new']=='(2) 3_month12') & (df_fact['num_of_payment']>2)]

Unnamed: 0,paid_date,user_id,payment,free_days_new,free_days_old,offer_duration,payer,min_paid_date,delta_month,num_of_payment,paid_date_month


In [182]:
df_fact[df_fact['free_days_new']=='(3) 14_month1'].groupby('paid_date_month').agg({'payment':'sum','user_id':'nunique'}).reset_index()

Unnamed: 0,paid_date_month,payment,user_id
0,2025-01,136322,420
1,2025-02,167587,562
2,2025-03,234112,777
3,2025-04,327870,1057
4,2025-05,402017,1172


In [183]:
pivot_df = df_fact.pivot_table(
                    columns='num_of_payment', 
                    index=['free_days_new','paid_date_month'],
                    values=['user_id','payment'],
                    aggfunc={'user_id': 'count', 'payment': 'sum'}
)

pivot_df.fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,payment,payment,payment,payment,payment,payment,user_id,user_id,user_id,user_id,user_id,user_id
Unnamed: 0_level_1,num_of_payment,1,2,3,4,5,6,1,2,3,4,5,6
free_days_new,paid_date_month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
(1) 3_month12,2025-01,74129.0,0.0,0.0,0.0,0.0,0.0,68.0,0.0,0.0,0.0,0.0,0.0
(1) 3_month12,2025-02,34613.0,299.0,0.0,0.0,0.0,0.0,31.0,1.0,0.0,0.0,0.0,0.0
(1) 3_month12,2025-03,69615.0,299.0,299.0,0.0,0.0,0.0,59.0,1.0,1.0,0.0,0.0,0.0
(1) 3_month12,2025-04,271139.0,2740.0,299.0,299.0,0.0,0.0,205.0,2.0,1.0,1.0,0.0,0.0
(1) 3_month12,2025-05,151183.0,3100.0,0.0,299.0,1550.0,0.0,100.0,2.0,0.0,1.0,1.0,0.0
(2) 3_month3,2025-01,30586.0,399.0,0.0,0.0,0.0,0.0,64.0,1.0,0.0,0.0,0.0,0.0
(2) 3_month3,2025-02,23451.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0,0.0,0.0,0.0,0.0
(2) 3_month3,2025-03,27193.0,499.0,0.0,0.0,0.0,0.0,57.0,1.0,0.0,0.0,0.0,0.0
(2) 3_month3,2025-04,52131.0,15119.0,399.0,399.0,0.0,0.0,99.0,31.0,1.0,1.0,0.0,0.0
(2) 3_month3,2025-05,42084.0,16516.0,0.0,0.0,0.0,0.0,66.0,34.0,0.0,0.0,0.0,0.0


In [188]:
print(df_fact[df_fact['free_days_new']!='(8) other']['user_id'].nunique())
print(df_fact[df_fact['free_days_new']!='(8) other']['payment'].sum())

12499
6545094


In [190]:
r = df_fact[df_fact['free_days_new']=='(8) other'].groupby(['paid_date_month','offer_duration','free_days_old'])['user_id'].nunique().reset_index()

t = r.pivot_table(index=['offer_duration','free_days_old'],
               columns='paid_date_month',
               values='user_id'
              ).fillna(0)


pd.set_option('display.max_rows', None)
# Также можно установить display.max_columns, если нужно показать все колонки

# Фактическая выручка старых пользователей 

In [198]:
query = '''SELECT * FROM
(SELECT 
paid_date,
t1.user_id AS user_id,
payment,
CASE WHEN offer_duration='1 month' THEN '1 month'
     WHEN offer_duration='3 month' THEN '3 month'
     WHEN offer_duration='12 month' THEN '12 month'
     ELSE 'other_1_month'
     END offer_type,
free_days AS free_days_old,
offer_duration,
payer,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days,payer FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2024-01-01' AND '2024-12-31' AND  paid_date::date BETWEEN '2025-01-01' AND '2025-05-31' AND payer=1
'''

df_fact = execute(query,user='kmekhtiev')
df_fact['paid_date'] = df_fact['paid_date'].astype('datetime64[ns]')
df_fact['paid_date_month'] = df_fact['paid_date'].dt.to_period('M')

Время выполнения функции: 1.8035 секунд



In [204]:
pivot_df = df_fact.pivot_table(
                    columns=['paid_date_month'], 
                #   index=['offer_type','paid_date_month'],
                    index=['offer_type'],
                    values=['user_id','payment'],
                    aggfunc={'user_id': 'count', 'payment': 'mean'}
)

pivot_df.fillna(0)

Unnamed: 0_level_0,payment,payment,payment,payment,payment,user_id,user_id,user_id,user_id,user_id
paid_date_month,2025-01,2025-02,2025-03,2025-04,2025-05,2025-01,2025-02,2025-03,2025-04,2025-05
offer_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1 month,292.645873,293.525381,293.839006,294.419424,295.427896,1587,1182,1087,937,846
12 month,1119.466667,1161.571429,1168.652174,1207.44186,1214.0,45,56,46,43,45
3 month,410.513158,405.186869,401.353556,414.268456,408.668508,456,792,956,298,543
other_1_month,624.0,799.0,657.823529,674.166667,674.375,16,8,17,12,8


In [208]:
query = '''SELECT * FROM
(SELECT 
paid_date,
t1.user_id AS user_id,
payment,
CASE WHEN offer_duration='1 month' THEN '1 month'
     WHEN offer_duration='3 month' THEN '3 month'
     WHEN offer_duration='12 month' THEN '12 month'
     ELSE 'other_1_month'
     END offer_type,
free_days AS free_days_old,
offer_duration,
payer,
ends_at,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days,payer,ends_at FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2024-01-01' AND '2024-12-31' AND  paid_date::date BETWEEN '2025-01-01' AND '2025-05-31' AND payer=1 AND ends_at>='2025-06-01'
'''

df_fact = execute(query,user='kmekhtiev')
df_fact['paid_date'] = df_fact['paid_date'].astype('datetime64[ns]')
df_fact['paid_date_month'] = df_fact['paid_date'].dt.to_period('M')

Время выполнения функции: 1.4334 секунд



In [212]:
df_fact.groupby('offer_type')['user_id'].nunique()

offer_type
1 month           869
12 month          227
3 month          1788
other_1_month      56
Name: user_id, dtype: int64

# Код для выгрузки пользователей для грейс

In [240]:
query = f'''SELECT DISTINCT user_id FROM datamarts.marketing_dash
            WHERE   1=1
                    AND payer=1 
                    AND ends_at BETWEEN '2025-07-28' AND '2025-08-17' 
                    AND platform='cloudpayments' 
                    AND recurrent=True
'''

users = execute(query,user='kmekhtiev')

Время выполнения функции: 1.0599 секунд



In [242]:

users_shuffled = users.sample(frac=1, random_state=42).reset_index(drop=True)


mid_point = len(users_shuffled) // 2
test_df = users_shuffled.iloc[:mid_point]
control_df = users_shuffled.iloc[mid_point:]


test_df.to_csv('test.csv', index=False)
control_df.to_csv('control.csv', index=False)
users_shuffled.to_csv('all_users.csv',index=False)