In [1]:
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import seaborn as sns 
import statsmodels.stats.proportion as proportion
from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import ttest_ind,mannwhitneyu,shapiro,norm
from statsmodels.stats.weightstats import ztest
from tqdm import tqdm
import timeit
from scipy import stats
import math
from datetime import date, datetime, timedelta
import time
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
import warnings
warnings.filterwarnings("ignore")
import clickhouse_connect  


sys.path.append('/Users/kemran/Desktop/work_files/python_files') 
from credential import postgres_secret,clickhouse_dwh_secret

def get_engine(user):
    if user == postgres_secret['user']:
        db_name = postgres_secret['db_name']
        password = postgres_secret['password']
        host = postgres_secret['host']
        engine = create_engine(f'postgresql://{user}:{password}@{host}:6432/{db_name}')
    elif user == clickhouse_dwh_secret['user']:
            db_name = clickhouse_dwh_secret['db_name'] 
            password = clickhouse_dwh_secret['password']
            host = clickhouse_dwh_secret['host']
            engine = create_engine(f'clickhouse://{user}:{password}@{host}:8123/{db_name}')
    return engine
    
connection_clickhouse = clickhouse_connect.get_client(
    host = clickhouse_dwh_secret['host'],
    port= '8123',
    username = clickhouse_dwh_secret['user'],
    password = clickhouse_dwh_secret['password'],
    database='datamarts'
    )

    
def execute(SQL, user):
    start_time = time.time()  # запоминаем время начала выполнения функции
    engine = get_engine(user)
    Session = sessionmaker(bind=engine)  # sessions factory ()
    with Session() as session: # open session
        result = session.execute(text(SQL))
        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        
    end_time = time.time()  # запоминаем время окончания выполнения функции
    execution_time = round(end_time - start_time,4) # вычисляем время выполнения   
    
    print(f"Время выполнения функции: {execution_time} секунд")
    print()
    return df

In [5]:
query = f'''
    SELECT 
    ends_at,
    user_id,
    recurrent,
    renewal_off_date,
    reg_source,
    device,
    free_days,
    created_at::date AS trial_date,
    CASE WHEN first_prolong_date!='1970-01-01' THEN 'subs' ELSE 'trial' END AS trial_subs_type
    FROM datamarts.marketing_dash
    WHERE created_at!='1970-01-01' AND ends_at BETWEEN now() AND now() + INTERVAL '14' day
    AND state!='canceled' AND promo_type!='cards' AND recurrent=TRUE AND renewal_off_date='1970-01-01'
'''
df = execute(query,user='kmekhtiev')

df['ends_at'] = pd.to_datetime(df['ends_at'])
df['trial_date'] = pd.to_datetime(df['trial_date'])

Время выполнения функции: 1.1852 секунд



In [7]:
df

Unnamed: 0,ends_at,user_id,recurrent,renewal_off_date,reg_source,device,free_days,trial_date,trial_subs_type
0,2025-04-13,2ca46052-8c6e-4acc-abb2-590275516f2f,true,1970-01-01,none,web,14,2024-08-30,subs
1,2025-04-13,ebb6d6cb-1e3a-4883-b219-f85d6f797e73,true,1970-01-01,none,android,14,2024-08-30,subs
2,2025-04-24,044f3a60-7f79-4654-ab77-cad634ebbb3b,true,1970-01-01,none,android,3,2025-04-10,trial
3,2025-04-21,cf9c7614-68a6-4d36-8ea8-cbb703894c4c,true,1970-01-01,none,web,30,2024-08-31,subs
4,2025-04-14,3bf70866-314b-4fbe-b9d5-036e1cff691f,true,1970-01-01,none,web,14,2024-08-31,subs
...,...,...,...,...,...,...,...,...,...
5226,2025-04-19,bbf56ff0-5865-4e60-8ad7-84730c6160e4,true,1970-01-01,none,web,14,2025-01-19,subs
5227,2025-04-23,00601bc7-017c-4e61-9e65-369c75440e09,true,1970-01-01,none,web,243,2024-08-23,subs
5228,2025-04-19,273cae5f-cb73-42f3-a57e-e43bece241dd,true,1970-01-01,google-play,android,35,2025-03-15,trial
5229,2025-04-20,6fa716c9-4b2f-4018-8bb8-c946039041c5,true,1970-01-01,none,web,3,2024-10-17,subs
