In [213]:
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import seaborn as sns 
import statsmodels.stats.proportion as proportion
from scipy.stats import ttest_ind,mannwhitneyu,shapiro,norm
from statsmodels.stats.weightstats import ztest
from tqdm import tqdm
import timeit
from scipy import stats
import math
from datetime import date, datetime, timedelta
import time
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
import warnings
warnings.filterwarnings("ignore")
import clickhouse_connect  



from credential import postgres_secret,clickhouse_dwh_secret

def get_engine(user):
    if user == postgres_secret['user']:
        db_name = postgres_secret['db_name']
        password = postgres_secret['password']
        host = postgres_secret['host']
        engine = create_engine(f'postgresql://{user}:{password}@{host}:6432/{db_name}')
    elif user == clickhouse_dwh_secret['user']:
            db_name = clickhouse_dwh_secret['db_name'] 
            password = clickhouse_dwh_secret['password']
            host = clickhouse_dwh_secret['host']
            engine = create_engine(f'clickhouse://{user}:{password}@{host}:8123/{db_name}')
    return engine
    
connection_clickhouse = clickhouse_connect.get_client(
    host = clickhouse_dwh_secret['host'],
    port= '8123',
    username = clickhouse_dwh_secret['user'],
    password = clickhouse_dwh_secret['password'],
    database='datamarts'
    )

    
def execute(SQL, user):
    start_time = time.time()  # запоминаем время начала выполнения функции
    engine = get_engine(user)
    Session = sessionmaker(bind=engine)  # sessions factory ()
    with Session() as session: # open session
        result = session.execute(text(SQL))
        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        
    end_time = time.time()  # запоминаем время окончания выполнения функции
    execution_time = round(end_time - start_time,4) # вычисляем время выполнения   
    
    print(f"Время выполнения функции: {execution_time} секунд")
    print()
    return df

In [215]:
# Новые Посетители
query = '''SELECT 
toStartOfMonth(min_date_visitor) AS date_month,
uniq(visitor_id) AS cnt_us
FROM
(SELECT min(date) AS min_date_visitor,visitor_id
FROM datamarts.clean_event
GROUP BY 2)
WHERE date_month BETWEEN '2025-01-01' AND '2025-05-01'
GROUP BY 1
'''

pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='date_month',aggfunc='sum')

Время выполнения функции: 6.2942 секунд



date_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01
cnt_us,499901,357258,331243,315802,473019


In [219]:
# Новые реги
query = '''SELECT 
toStartOfMonth(reg_date) AS reg_month,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash
WHERE reg_month BETWEEN '2025-01-01' AND '2025-05-01'
--WHERE reg_month BETWEEN now() - interval 7 month AND now() - interval 1 month
GROUP BY 1
'''

pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='reg_month',aggfunc='sum')

Время выполнения функции: 0.7009 секунд



reg_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01
cnt_user,35618,32653,42872,40723,34532


In [221]:
# Новые Триалы ВСЕ

query = '''SELECT 
toStartOfMonth(created_at) AS trial_month,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash x
WHERE trial_month BETWEEN '2025-01-01' AND '2025-05-01'
--WHERE trial_month BETWEEN now() - interval 7 month AND now() - interval 1 month
GROUP BY 1
'''


pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='trial_month',aggfunc='sum')


Время выполнения функции: 0.7329 секунд



trial_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01
cnt_user,10422,8384,10169,9966,8317


In [261]:
# Новые Триалы РАЗБИВКА ПО ПЕРИОДАМ


query = '''SELECT 
toStartOfMonth(created_at) AS trial_month,
CASE WHEN free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'     --toString(free_days)
END AS free_days,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash x
WHERE trial_month::date BETWEEN '2024-10-01' AND '2024-12-01'
GROUP BY 1,2
ORDER BY 1,2
'''

pivot_table = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='trial_month',index='free_days',aggfunc='sum')
pivot_table = pivot_table.fillna(0)
pivot_table = pivot_table.astype(int)  

#pivot_table.iloc[:-1]
pivot_table

Время выполнения функции: 0.7374 секунд



Unnamed: 0_level_0,cnt_user,cnt_user,cnt_user
trial_month,2024-10-01,2024-11-01,2024-12-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
(1) 3_month12,198,84,101
(2) 3_month3,121,145,107
(3) 14_month1,1220,1367,1453
(4) 30_month1,154,992,1211
(5) 30_month3,1040,854,388
(6) 35_month3,3595,4798,6030
(7) 45_month3,437,218,341
(8) other,666,1013,825


In [263]:
# Новые подписчики ВСЕ
query = '''SELECT 
toStartOfMonth(first_prolong_date) AS subs_month,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash AS x
--WHERE subs_month BETWEEN now() - interval 7 month AND now() - interval 1 month
WHERE reg_date!='1970-01-01' AND subs_month::date BETWEEN '2024-10-01' AND '2024-12-01'
GROUP BY 1
'''

pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='subs_month',aggfunc='sum')

Время выполнения функции: 0.7702 секунд



subs_month,2024-10-01,2024-11-01,2024-12-01
cnt_user,1483,2018,2640


In [265]:
# Новые подписчики РАЗБИВКА ПО ПЕРИОДАМ
query = '''SELECT 
toStartOfMonth(first_prolong_date) AS subs_month,
CASE WHEN free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'    --toString(free_days)
END AS free_days,
uniq(profile_id) AS cnt_user
FROM datamarts.marketing_dash AS x
WHERE subs_month::date BETWEEN '2024-10-01' AND '2024-12-01'
--subs_month BETWEEN now() - interval 7 month AND now() - interval 1 month
AND reg_date!='1970-01-01'
GROUP BY 1,2
'''

pivot_table = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='subs_month',index='free_days',aggfunc='sum')
pivot_table = pivot_table.fillna(0)
pivot_table = pivot_table.astype(int)  

# pivot_table.iloc[:-1]
pivot_table

Время выполнения функции: 0.9118 секунд



Unnamed: 0_level_0,cnt_user,cnt_user,cnt_user
subs_month,2024-10-01,2024-11-01,2024-12-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
(1) 3_month12,94,43,77
(2) 3_month3,49,58,68
(3) 14_month1,302,417,397
(4) 30_month1,68,100,175
(5) 30_month3,166,271,251
(6) 35_month3,594,992,1375
(7) 45_month3,50,61,26
(8) other,160,76,271


In [229]:
# ARPU ВСЕ

query = '''
SELECT 
toStartOfMonth(paid_date) AS paid_month,
sum(payment)/uniq(user_id) AS ARPPU
FROM datamarts.finance 
WHERE paid_month::date BETWEEN '2024-10-01' AND '2024-12-01'
--WHERE paid_month BETWEEN now() - interval 7 month AND now() - interval 1 month
GROUP BY 1
'''

pivot_df = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='paid_month',aggfunc='sum')
pivot_df = pivot_df.astype(int) 

pivot_df

Время выполнения функции: 0.6204 секунд



paid_month,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01
ARPPU,358,344,348,378,359


In [267]:
# ARPU ПО СЕГМЕНТАМ

query = '''
SELECT 
toStartOfMonth(paid_date) AS paid_month,
CASE WHEN t2.free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN t2.free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN t2.free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN t2.free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN t2.free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN t2.free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN t2.free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days,
sum(payment)/uniq(t1. user_id) AS ARPPU
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
WHERE paid_month::date BETWEEN '2024-10-01' AND '2024-12-01'
--WHERE paid_month BETWEEN now() - interval 7 month AND now() - interval 1 month
--AND free_days IN ('3_month3','3_month12','14','30','35','45')
GROUP BY 1,2
'''



pivot_table = pd.pivot_table(data = execute(query,user='kmekhtiev'),columns='paid_month',index='free_days',aggfunc='sum')
pivot_table = pivot_table.fillna(0)
pivot_table = pivot_table.astype(int) 

# pivot_table.iloc[:-1]
pivot_table

Время выполнения функции: 0.9866 секунд



Unnamed: 0_level_0,ARPPU,ARPPU,ARPPU
paid_month,2024-10-01,2024-11-01,2024-12-01
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
(1) 3_month12,1157,1127,1125
(2) 3_month3,477,477,472
(3) 14_month1,300,298,298
(4) 30_month1,299,217,299
(5) 30_month3,396,397,397
(6) 35_month3,394,394,395
(7) 45_month3,392,396,388
(8) other,283,293,278


In [233]:
# Число платящих и ретеншн в повторную подписку ВСЕ

#pd.options.display.float_format = '{:.6f}'.format 

query = '''   
SELECT * FROM 
(SELECT 
paid_date,
user_id,
payment,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance
)
WHERE min_paid_date::date BETWEEN '2025-01-01' AND '2025-05-31'
--WHERE min_paid_date BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
'''

df_payment = execute(query,user='kmekhtiev')

pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],columns='num_of_payment',values=['user_id'],aggfunc={'user_id': "nunique"})


user_id_share = {}
for i in pivot_table.columns:
    user_id_share[i] = round(pivot_table[i]['user_id']/pivot_table[1]['user_id'],4)
    
df_user_id_share = pd.DataFrame(data=user_id_share, index=['retention to resubs'])
pivot_table.astype('int')
df_concat = pd.concat([pivot_table,df_user_id_share])

df_concat

Время выполнения функции: 1.4143 секунд



Unnamed: 0,1,2,3,4,5,6
user_id,14314.0,4233.0,1081.0,546.0,292.0,123.0
retention to resubs,1.0,0.2957,0.0755,0.0381,0.0204,0.0086


In [269]:
# Число платящих и ретеншн в повторную подписку РАЗБИВКА ПО ПЕРИОДАМ

#pd.options.display.float_format = '{:.6f}'.format 

query = '''   
SELECT * FROM
(SELECT 
paid_date,
t1.user_id AS user_id,
payment,
CASE WHEN t2.free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN t2.free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN t2.free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN t2.free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN t2.free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN t2.free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN t2.free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2024-10-01' AND '2024-12-01'
--WHERE min_paid_date BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
'''

df_payment = execute(query,user='kmekhtiev')

pivot_table = pd.pivot_table(data=df_payment[df_payment['num_of_payment']<7],columns='num_of_payment',index='free_days',values=['user_id'],aggfunc={'user_id': "nunique"})

pivot_table = pivot_table.fillna(0)
r = pivot_table.div(pivot_table[('user_id', 1)], axis=0)

# pd.concat([pivot_table,r]).sort_values(by='free_days').iloc[:-2]
pd.concat([pivot_table,r]).sort_values(by='free_days')

Время выполнения функции: 2.5539 секунд



Unnamed: 0_level_0,user_id,user_id,user_id,user_id,user_id,user_id
num_of_payment,1,2,3,4,5,6
free_days,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
(1) 3_month12,130.0,3.0,0.0,0.0,0.0,0.0
(1) 3_month12,1.0,0.023077,0.0,0.0,0.0,0.0
(2) 3_month3,112.0,72.0,50.0,2.0,0.0,0.0
(2) 3_month3,1.0,0.642857,0.446429,0.017857,0.0,0.0
(3) 14_month1,726.0,423.0,304.0,232.0,198.0,164.0
(3) 14_month1,1.0,0.582645,0.418733,0.319559,0.272727,0.225895
(4) 30_month1,162.0,48.0,36.0,26.0,18.0,14.0
(4) 30_month1,1.0,0.296296,0.222222,0.160494,0.111111,0.08642
(5) 30_month3,426.0,188.0,118.0,0.0,0.0,0.0
(5) 30_month3,1.0,0.441315,0.276995,0.0,0.0,0.0


In [None]:
# Watchtime ВСЕ
query = '''SELECT 
toStartOfMonth(date) AS date_month,
sum(watchtime)/60 AS watchtime,
uniq(profile_id) AS cnt_user,
round(watchtime/cnt_user,0)::int AS watchtime_per_user
FROM datamarts.watchtime_by_day AS x
WHERE date_month BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
GROUP BY 1
'''

pd.pivot_table(execute(query,user='kmekhtiev'),columns='date_month')

In [None]:
# Watchtime РАЗБИВКА ПО СЕГМЕНТАМ
query = '''SELECT
date_month,
free_days,
watchtime_per_user
FROM
(SELECT 
toStartOfMonth(date) AS date_month,
CASE WHEN free_days='3' AND price_cents/100 IN (399,499) THEN '3_month3'
     WHEN free_days='3' AND price_cents/100=1190 THEN '3_month12'
     ELSE toString(free_days)
END AS free_days,
sum(watchtime)/60 AS watchtime,
uniq(profile_id) AS cnt_user,
round(watchtime/cnt_user,0)::int AS watchtime_per_user
FROM datamarts.watchtime_by_day AS x
LEFT JOIN datamarts.marketing_dash AS t on x.profile_id=t.profile_id
WHERE date_month BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
AND free_days IN ('3_month3','3_month12','14','30','35','45')
GROUP BY 1,2
)
'''

pd.pivot_table(execute(query,user='kmekhtiev'),columns='date_month',index='free_days')

# Фактическая выручка от новых пользователей

In [301]:
query = '''SELECT * FROM
(SELECT 
paid_date,
t1.user_id AS user_id,
payment,
CASE WHEN t2.free_days=3 AND offer_duration='12 month' THEN '(1) 3_month12'
     WHEN t2.free_days=3 AND offer_duration='3 month' THEN '(2) 3_month3'
     WHEN t2.free_days=14 AND offer_duration='1 month' THEN '(3) 14_month1'
     WHEN t2.free_days=30 AND offer_duration ='1 month' THEN '(4) 30_month1'
     WHEN t2.free_days=30 AND offer_duration ='3 month' THEN '(5) 30_month3'
     WHEN t2.free_days=35 AND offer_duration ='3 month' THEN '(6) 35_month3'
     WHEN t2.free_days=45 AND offer_duration ='3 month' THEN '(7) 45_month3'
     ELSE '(8) other'  --toString(free_days)
END AS free_days_new,
free_days AS free_days_old,
offer_duration,
min(paid_date) OVER (PARTITION BY user_id) AS min_paid_date,
(EXTRACT(YEAR FROM paid_date) - EXTRACT(YEAR FROM min_paid_date)) * 12 +  
(EXTRACT(MONTH FROM paid_date) - EXTRACT(MONTH FROM min_paid_date)) AS delta_month,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY paid_date) AS num_of_payment
FROM datamarts.finance AS t1
LEFT JOIN (SELECT user_id,offer_duration,free_days FROM datamarts.marketing_dash) AS t2 on t1.user_id=t2.user_id
)
WHERE min_paid_date::date BETWEEN '2025-01-01' AND '2025-05-31' AND  paid_date::date BETWEEN '2025-01-01' AND '2025-05-31'
'''

df_fact = execute(query,user='kmekhtiev')
df_fact['paid_date'] = df_fact['paid_date'].astype('datetime64[ns]')
df_fact['paid_date_month'] = df_fact['paid_date'].dt.to_period('M')

Время выполнения функции: 1.8993 секунд



In [303]:
df_fact[(df_fact['free_days_new']=='(2) 3_month12') & (df_fact['num_of_payment']>2)]

Unnamed: 0,paid_date,user_id,payment,free_days_new,free_days_old,offer_duration,min_paid_date,delta_month,num_of_payment,paid_date_month


In [305]:
df_fact[df_fact['free_days_new']=='(3) 14_month1'].groupby('paid_date_month').agg({'payment':'sum','user_id':'nunique'}).reset_index()

Unnamed: 0,paid_date_month,payment,user_id
0,2025-01,137518,424
1,2025-02,169381,568
2,2025-03,236205,783
3,2025-04,333835,1073
4,2025-05,405706,1184


In [307]:
pivot_df = df_fact.pivot_table(
                    columns='num_of_payment', 
                    index=['free_days_new','paid_date_month'],
                    values=['user_id','payment'],
                    aggfunc={'user_id': 'count', 'payment': 'sum'}
)

pivot_df.fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,payment,payment,payment,payment,payment,payment,user_id,user_id,user_id,user_id,user_id,user_id
Unnamed: 0_level_1,num_of_payment,1,2,3,4,5,6,1,2,3,4,5,6
free_days_new,paid_date_month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
(1) 3_month12,2025-01,73830.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0,0.0
(1) 3_month12,2025-02,35504.0,299.0,0.0,0.0,0.0,0.0,31.0,1.0,0.0,0.0,0.0,0.0
(1) 3_month12,2025-03,69615.0,0.0,299.0,0.0,0.0,0.0,59.0,0.0,1.0,0.0,0.0,0.0
(1) 3_month12,2025-04,271139.0,2740.0,0.0,299.0,0.0,0.0,205.0,2.0,0.0,1.0,0.0,0.0
(1) 3_month12,2025-05,151154.0,3100.0,0.0,0.0,1550.0,0.0,99.0,2.0,0.0,0.0,1.0,0.0
(2) 3_month3,2025-01,32382.0,399.0,0.0,0.0,0.0,0.0,68.0,1.0,0.0,0.0,0.0,0.0
(2) 3_month3,2025-02,24150.0,0.0,0.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0
(2) 3_month3,2025-03,27792.0,499.0,0.0,0.0,0.0,0.0,58.0,1.0,0.0,0.0,0.0,0.0
(2) 3_month3,2025-04,52980.0,15119.0,399.0,399.0,0.0,0.0,100.0,31.0,1.0,1.0,0.0,0.0
(2) 3_month3,2025-05,42084.0,16217.0,0.0,0.0,0.0,0.0,66.0,33.0,0.0,0.0,0.0,0.0


In [309]:
print(df_fact[df_fact['free_days_new']!='(8) other']['user_id'].nunique())
print(df_fact[df_fact['free_days_new']!='(8) other']['payment'].sum())

12658
6611421


In [331]:
r = df_fact[df_fact['free_days_new']=='(8) other'].groupby(['paid_date_month','offer_duration','free_days_old'])['user_id'].nunique().reset_index()

t = r.pivot_table(index=['offer_duration','free_days_old'],
               columns='paid_date_month',
               values='user_id'
              ).fillna(0)


pd.set_option('display.max_rows', None)
# Также можно установить display.max_columns, если нужно показать все колонки
pd.set_option('display.max_columns', None)

t

Unnamed: 0_level_0,paid_date_month,2025-01,2025-02,2025-03,2025-04,2025-05
offer_duration,free_days_old,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
,14,1.0,1.0,2.0,1.0,3.0
,3,0.0,0.0,0.0,1.0,1.0
0,3,0.0,0.0,1.0,0.0,0.0
1 month,0,2.0,4.0,3.0,3.0,3.0
1 month,100,0.0,0.0,0.0,0.0,2.0
1 month,104,1.0,1.0,1.0,0.0,0.0
1 month,106,0.0,0.0,0.0,0.0,1.0
1 month,110,0.0,0.0,0.0,1.0,0.0
1 month,118,0.0,0.0,0.0,1.0,0.0
1 month,120,0.0,0.0,0.0,16.0,20.0


# Кейс с добилливанием

In [None]:
query = '''WITH all_info AS (SELECT  
        s.user_id AS user_id,
        s.created_at as subscription_created_at,
        s.id AS id,
        leadInFrame(id) OVER (PARTITION BY user_id ORDER BY invoice_created_at  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_id,
        i.state as invoice_state,
        leadInFrame(invoice_state) OVER (PARTITION BY user_id ORDER BY invoice_created_at  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_invoice_state,
        i.id as invoice_id,
        i.created_at as invoice_created_at,
        leadInFrame(invoice_created_at) OVER (PARTITION BY user_id ORDER BY invoice_created_at  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_invoice_created_at,  
        next_invoice_created_at::date - invoice_created_at::date delta_date,
        i.price_cents AS price_cents,
        i.price_currency AS price_currency,
        i.refund_amount_cents AS refund_amount_cents,
        ROW_NUMBER() OVER (PARTITION BY user_id,id,id,user_type,invoice_state ORDER BY invoice_created_at) AS rn_num,
        CASE WHEN price_cents<=100 AND invoice_state='success' THEN 'trial'
             WHEN price_cents<=100 AND invoice_state IN ('failure','initial') THEN 'not_success_trial'
             ELSE 'subs'
             END user_type,
        CASE WHEN free_days='3' AND price_cents/100 IN (399,499) THEN '3_month3'
             WHEN free_days='3' AND price_cents/100=1190 THEN '3_month12'
             ELSE toString(free_days)
        END AS free_days,
        leadInFrame(user_type) OVER (PARTITION BY user_id ORDER BY invoice_created_at  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS next_user_type, 
        t2.renewal_off_date AS renewal_off_date,
        CASE WHEN renewal_off_date!='1970-01-01' THEN 1 ELSE 0 END unsubscribe_type,
        t3.offer_duration AS offer_duration,
        t3.promo_type,
        i.paid_at AS paid_at,
        --i.period_end,
        s.platform AS platform,
       -- i.payment_target,
        i.deleted_at,
        i.refunded_at
    FROM raw.viju__product_x__public__invoices i
    LEFT JOIN raw.viju__product_x__public__subscriptions s 
        ON i.subscription_id = s.id  
    LEFT JOIN (SELECT
                subscription_id,
                max(created_at) as renewal_off_date
                FROM raw.viju__product_x__public__subscription_cancel_reasons
                WHERE created_at >= '2022-03-01'
                GROUP by 1
                ) AS t2 ON i.subscription_id=t2.subscription_id
    INNER JOIN datamarts.marketing_dash AS t3 ON s.user_id=t3.user_id
    WHERE s.platform != 'api'
    ORDER BY s.user_id, invoice_created_at 
    )
--    
SELECT * FROM all_info
WHERE invoice_created_at::date BETWEEN now() - interval 7 month AND now() - interval 1 MONTH
--AND user_type!='not_success_trial'
AND platform!='payture'
'''

df_failure = execute(query,user='kmekhtiev')

## Триальщики,у которых не удается списать деньги 

In [None]:
# ВСЕ
df_failure['invoice_created_month'] = df_failure['invoice_created_at'].dt.to_period('M')
df_failure_trial = df_failure[(df_failure['user_type']=='trial') & \
                        (df_failure['next_invoice_state']=='failure') & \
                        (df_failure['invoice_created_month']<'2025-02-01') & \
                        (df_failure['invoice_created_month']>'2024-08-01')]
                                                                                                                        

df_failure_trial_agg = df_failure_trial.groupby(['invoice_created_month'])['user_id'].count().reset_index()
pd.pivot_table(df_failure_trial_agg,columns='invoice_created_month')

In [None]:
# РАЗБИВКА ПО СЕГМЕНТАМ
df_failure['invoice_created_month'] = df_failure['invoice_created_at'].dt.to_period('M')
df_failure_trial = df_failure[(df_failure['user_type']=='trial') & \
                        (df_failure['next_invoice_state']=='failure') & \
                        (df_failure['invoice_created_month']<'2025-02-01') & \
                        (df_failure['invoice_created_month']>'2024-08-01') & \
                        (df_failure['free_days'].isin(['3_month3','3_month12','14','30','35','45']))]
                                                                                                                        

df_failure_trial_agg = df_failure_trial.groupby(['invoice_created_month','free_days'])['user_id'].count().reset_index()
pd.pivot_table(df_failure_trial_agg,columns='invoice_created_month',index='free_days')

## Подписчики, у которых нет денег после первого месяца

In [None]:
# ВСЕ
df_failure['invoice_created_month'] = df_failure['invoice_created_at'].dt.to_period('M')
df_failure_subs = df_failure[(df_failure['user_type']=='subs') & \
                        (df_failure['invoice_state']=='success') & \
                        (df_failure['next_invoice_state']=='failure') & \
                        (df_failure['invoice_created_month']<'2025-02-01') & \
                        (df_failure['invoice_created_month']>'2024-08-01')]
                                                                                                                        

df_failure_subs_agg = df_failure_subs.groupby(['invoice_created_month'])['user_id'].count().reset_index()
pd.pivot_table(df_failure_subs_agg,columns='invoice_created_month')

In [None]:
# РАЗБИВКА ПО СЕГМЕНТАМ
df_failure['invoice_created_month'] = df_failure['invoice_created_at'].dt.to_period('M')
df_failure_subs = df_failure[(df_failure['user_type']=='subs') & \
                        (df_failure['invoice_state']=='success') & \
                        (df_failure['next_invoice_state']=='failure') & \
                        (df_failure['invoice_created_month']<'2025-02-01') & \
                        (df_failure['invoice_created_month']>'2024-08-01') & \
                        (df_failure['free_days'].isin(['3_month3','3_month12','14','30','35','45']))]
                                                                                                                        

df_failure_subs_agg = df_failure_subs.groupby(['invoice_created_month','free_days'])['user_id'].count().reset_index()
pd.pivot_table(df_failure_subs_agg,columns='invoice_created_month',index='free_days')