# R. Доверительные интервалы

На тех же данных постройте доверительные интервалы для:

Δ gmv - разницы средних gmv

Δ gmv, % - процентного изменения средних gmv

Δ aov - разницы средних чеков

Δ aov, % - процентного изменения средних чеков

Используйте данные из файла [synthetic_gmv_data_1.2.csv](https://github.com/dakhakimova/YSDA_ABweek/blob/476cbc4a49e1f4dfcdb376d69239b6103fbad932/synthetic_gmv_data_1.2.csv)

## Формат вывода


В ответе выведите 4 замкнутых интервала через пробел.
Целую и дробную часть чисел разделяйте точкой.



Пример ответа:

[0.239, 0.179] [1.332, 2.007] [2.019, 2.025] [0.808, 2.004]


## Примечание

Округлите до 3-го знака после точки.

Везде используте распределение Стьюдента (вместо нормального), для степеней свободы используйте упрощенную формулу: 
n+m−2 (количество уников теста + количество уников контроля - 2).

# Решение

In [96]:
import pandas as pd
import scipy.stats as stats
import numpy as np

In [97]:
def safe_divide(x, y):
    try:
        return x / y
    except ZeroDivisionError:
        return np.nan

In [98]:
url =                "https://github.com/dakhakimova/YSDA_ABweek/blob/476cbc4a49e1f4dfcdb376d69239b6103fbad932/synthetic_gmv_data_1.2.csv"
url = "https://raw.githubusercontent.com/dakhakimova/YSDA_ABweek/" + "476cbc4a49e1f4dfcdb376d69239b6103fbad932/synthetic_gmv_data_1.2.csv"
df = pd.read_csv(url)

In [99]:
df.head()

Unnamed: 0,user_id,gmv,group_name
0,myo4ixol31,1428,test
1,myo4ixol31,1428,test
2,myo4ixol31,1071,test
3,myo4ixol31,1071,test
4,pkzf2889ww,351,test


In [100]:
test = df[df['group_name'] == 'test']
control = df[df['group_name'] == 'control']

In [101]:
X_test = test.groupby('user_id')['gmv'].sum()
X_control = control.groupby('user_id')['gmv'].sum()

In [102]:
def delta_gmv_std(X: pd.Series, Y: pd.Series):
    n = X.size
    m = Y.size
    var_X = X.var() / n
    var_Y = Y.var() / m
    return np.sqrt(var_X + var_Y)

def delta_gmv(X: pd.Series, Y: pd.Series, alpha=0.05):
    n = X.size
    m = Y.size
    z = stats.t.ppf(1-alpha/2, df=n+m-2)
    delta = X.mean() - Y.mean()
    delta_std = delta_gmv_std(X, Y)
    left_bound = delta - z * delta_std
    right_bound = delta + z * delta_std
    return delta, left_bound, right_bound

In [103]:
delta, left_bound, right_bound = delta_gmv(X_test, X_control)
delta, left_bound, right_bound

(23.433367596802327, 3.975394614703312, 42.89134057890134)

In [104]:
ans = f"[{left_bound:.3f} {right_bound:.3f}]"
ans

'[3.975 42.891]'

In [105]:
def delta_gmv_percent_var(X: pd.Series, Y: pd.Series):
    n = X.size
    m = Y.size
    mu_X = X.mean()
    mu_Y = Y.mean()
    var_X = X.var() / n
    var_Y = Y.var() / m
    return var_X / mu_Y**2  + var_Y*mu_X**2 / mu_Y**4

def delta_gmv_percent_std(X: pd.Series, Y: pd.Series):
    var = delta_gmv_percent_var(X, Y)
    return np.sqrt(var)

def delta_gmv_percent(X: pd.Series, Y: pd.Series, alpha=0.05):
    n = X.size
    m = Y.size
    z = stats.t.ppf(1-alpha/2, df=n+m-2)
    delta_percent = 100 * (X.mean() - Y.mean()) / Y.mean()
    delta_percent_std = delta_gmv_percent_std(X, Y)
    left_bound = delta_percent - 100 * z * delta_percent_std
    right_bound = delta_percent + 100 * z * delta_percent_std
    return delta_percent, left_bound, right_bound

In [106]:
delta, left_bound, right_bound = delta_gmv_percent(X_test, X_control)
delta, left_bound, right_bound

(0.823027103484432, 0.13823808422869943, 1.5078161227401645)

In [107]:
ans += f"[{left_bound:.3f} {right_bound:.3f}]"
ans

'[3.975 42.891][0.138 1.508]'

In [108]:
def ratio_aov_var(X: pd.Series, Y: pd.Series):
    n = X.size
    mu_X = X.mean()
    mu_Y = Y.mean()
    var_X = X.var() / n
    var_Y = Y.var() / n
    cov_X_Y = X.cov(Y) / n
    est = mu_X / mu_Y
    return (var_X-2*est * cov_X_Y + est ** 2 * var_Y) / mu_Y ** 2

def ratio_aov_std(Xt: pd.Series, Yt: pd.Series, Xc: pd.Series, Yc: pd.Series):
    var_t = ratio_aov_var(Xt, Yt)
    var_c = ratio_aov_var(Xc, Yc)
    return np.sqrt(var_c + var_t)

def ratio_aov(X_t: pd.Series, Y_t: pd.Series, X_c: pd.Series, Y_c: pd.Series, alpha=0.05):
    n = X_t.size
    m = X_c.size
    z = stats.t.ppf(1-alpha/2, df=n+m-2)
    delta_std = ratio_aov_std(X_t,Y_t, X_c, Y_c)
    delta = X_t.mean() / Y_t.mean() - X_c.mean() / Y_c.mean()
    left_bound = delta - z * delta_std
    right_bound = delta + z * delta_std
    return delta, left_bound, right_bound

In [109]:
Y_test = test['user_id'].value_counts()
Y_control = control['user_id'].value_counts()

In [110]:
delta, left_bound, right_bound = ratio_aov(X_test, Y_test, X_control, Y_control)
delta, left_bound, right_bound

(3.9824757316729347, 0.6522778400921125, 7.312673623253756)

In [111]:
ans += f"[{left_bound:.3f} {right_bound:.3f}]"
ans

'[3.975 42.891][0.138 1.508][0.652 7.313]'

In [112]:
def ratio_aov_percent_var(Xt: pd.Series, Yt: pd.Series, Xc: pd.Series, Yc: pd.Series):
    mu_X_c = Xc.mean()
    mu_Y_c = Yc.mean()
    mu_X_t = Xt.mean()
    mu_Y_t = Yt.mean()
    Rc = mu_X_c / mu_Y_c
    Rt = mu_X_t / mu_Y_t
    var_t = ratio_aov_var(Xt,Yt)
    var_c = ratio_aov_var(Xc,Yc)
    return (var_t + var_c * (Rt/ Rc)**2) / Rc**2

def ratio_aov_percent_std(Xt: pd.Series, Yt: pd.Series, Xc: pd.Series, Yc: pd.Series):
    var = ratio_aov_percent_var(Xt, Yt, Xc, Yc)
    return np.sqrt(var)

def ratio_aov_percent(Xt: pd.Series, Yt: pd.Series, Xc: pd.Series, Yc: pd.Series, alpha=0.05):
    n = Xt.size
    m = Xc.size
    z = stats.t.ppf(1-alpha/2, df=n+m-2)
    mu_X_c = Xc.mean()
    mu_Y_c = Yc.mean()
    mu_X_t = Xt.mean()
    mu_Y_t = Yt.mean()
    Rc = mu_X_c / mu_Y_c
    Rt = mu_X_t / mu_Y_t
    delta_percent = 100 * (Rt - Rc) / Rc
    delta_percent_std = ratio_aov_percent_std(Xt, Yt, Xc, Yc)
    left_bound = delta_percent - 100 * z * delta_percent_std
    right_bound = delta_percent + 100 * z * delta_percent_std
    return delta_percent, left_bound, right_bound

In [113]:
delta, left_bound, right_bound = ratio_aov_percent(X_test, Y_test, X_control, Y_control)
delta, left_bound, right_bound

(0.5687435358170613, 0.09248219188158485, 1.0450048797525378)

In [114]:
ans += f"[{left_bound:.3f} {right_bound:.3f}]"
ans

'[3.975 42.891][0.138 1.508][0.652 7.313][0.092 1.045]'

Ответ: `[3.975 42.891][0.138 1.508][0.652 7.313][0.092 1.045]`