In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import norm, ttest_ind
from datetime import datetime
import os

import ipywidgets as widgets

In [2]:
def read_database(file_name):
    return pd.read_csv(file_name)

In [3]:
df_sales = read_database('2022-04-01T12_df_sales.csv')
df_sales['date'] = pd.to_datetime(df_sales['date'])

In [4]:
print(df_sales.shape)
df_sales.head(2)

(203847, 6)


Unnamed: 0,sale_id,date,count_pizza,count_drink,price,user_id
0,1000001,2022-02-04 10:00:24,1,0,720,1c1543
1,1000002,2022-02-04 10:02:28,1,1,930,a9a6e8


In [5]:
mask1 = df_sales['date']>=datetime(2022, 2, 21)
mask2 = df_sales['date']<datetime(2022, 2, 28)

In [6]:
df_sales = df_sales.loc[mask1&mask2]
df_sales.shape

(25347, 6)

In [7]:
df_sales2 = df_sales.groupby('user_id', as_index=False).agg({'price': 'sum'})
df_sales2.head(3)

Unnamed: 0,user_id,price
0,00045f,720
1,0006bb,1260
2,000b52,3480


In [9]:
def get_minimal_determinable_effect(std, sample_size, alpha=0.05, beta=0.2):
    t_alpha = norm.ppf(1 - alpha / 2, loc=0, scale=1)
    t_beta = norm.ppf(1 - beta, loc=0, scale=1)
    disp_sum_sqrt = (2 * (std ** 2)) ** 0.5
    mde = (t_alpha + t_beta) * disp_sum_sqrt / np.sqrt(sample_size)
    return mde

In [11]:
def get_sample_size_arb(mu, std, eff=1.01, alpha=0.05, beta=0.2):
    epsilon = (eff - 1) * mu

    return get_sample_size_abs(epsilon, std=std, alpha=alpha, beta=beta)

def check_ttest(a, b, alpha=0.05):
    """Тест Стьюдента. Возвращает 1, если отличия значимы."""
    _, pvalue = ttest_ind(a, b)
    return int(pvalue < alpha)

In [31]:
mu = 0
sample_size = round(df_sales['user_id'].count()/2)
alpha = 0.05
beta = 0.1
std = df_sales2['price'].std()

mde = get_minimal_determinable_effect(
    std, sample_size, alpha=alpha, beta=beta
)
print(f'mde = {round(mde)}\n')

print('Ошибки I и II рода:')
for _ in range(5):
    result_aa = []
    result_ab = []
    for _ in range(5000):
        a_one = np.random.normal(mu, std, sample_size)
        a_two = np.random.normal(mu, std, sample_size)
        b = np.random.normal(mu + mde, std, sample_size)
        result_aa.append(check_ttest(a_one, a_two, alpha=alpha))
        result_ab.append(check_ttest(a_one, b, alpha=alpha))

    error_first = np.mean(result_aa)
    error_second = 1 - np.mean(result_ab)
    print(f'  {error_first:0.3f} \t{error_second:0.3f}')

mde = 33

Ошибки I и II рода:
  0.047 	0.095
  0.051 	0.098
  0.052 	0.104
  0.052 	0.105
  0.046 	0.099
