In [None]:
# Function to test two average
def test_two_average(data_group_y, data_group_x, SIGNIFICANCE_LEVEL = 0.05, lower_ratio = 0.5, upper_ratio = 2):
  """
  This function is to test 2 metric average per user if they are siginificantly different
  :params:
    :data_group_y: array - vecto metric of variant group
    :data_group_x: array - vecto metric of control group
    :SIGNIFICANCE_LEVEL: scalar - default confident interval 95%
    :lower_ratio: scalar - the below threshold of ratio of 2 variance, to define if the two variance are equal or not
    :upper_ratio: scalar - the above threshold of ratio of 2 variance, to define if the two variance are equal or not
  :return:
    :dataframe include t-stats, p-value, mean, confidence interval
  """

  mean_x = np.mean(data_group_x)
  mean_y = np.mean(data_group_y)
  if(mean_x == 0):
      return (pd.DataFrame([[np.nan, np.nan, mean_y, mean_x, np.nan, np.nan, np.nan]],\
                            columns = ['t_stats', 'p_value', 'mean_variant', 'mean_control', 'uplift_percent', 'lower_bound_uplift_percent', 'upper_bound_uplift_percent']))

  var_y = np.var(data_group_y, ddof = 1)
  var_x = np.var(data_group_x, ddof = 1)
  variance_ratio = var_y / var_x

  use_var = 'pooled'
  method = 'Pooled T-test'
  if(variance_ratio < lower_ratio or variance_ratio > upper_ratio):
    use_var = 'unequal'
    method = 'Welch T-test'

  t_stats, p_value, dof = ttest_ind(data_group_y, data_group_x, usevar = use_var, alternative='two-sided', value = 0)

  n_x = len(data_group_x)
  n_y = len(data_group_y)
  std_delta = 0

  if(use_var == 'pooled'):
    std_delta = np.sqrt(((n_x - 1)*var_x + (n_y - 1)*var_y)/(n_x + n_y - 2) * (1/n_x + 1/n_y))
  else:
    std_delta = np.sqrt(var_x/n_x + var_y/n_y)

  critical_value = stats.t.ppf(q = 1 - SIGNIFICANCE_LEVEL/2, df = math.floor(dof))

  uplift_percent = (mean_y - mean_x)/mean_x

  lower_bound = mean_y - mean_x - critical_value*std_delta
  upper_bound = mean_y - mean_x + critical_value*std_delta

  lower_bound_uplift_percent = lower_bound/mean_x
  upper_bound_uplift_percent = upper_bound/mean_x

  return (pd.DataFrame([[method, t_stats, p_value, mean_y, mean_x, uplift_percent, lower_bound_uplift_percent, upper_bound_uplift_percent]], columns = ['method', 't_stats', 'p_value', 'mean_variant', 'mean_control', 'uplift_percent', 'lower_bound_uplift_percent', 'upper_bound_uplift_percent']))


# Function to test two proportion
def test_two_proportion(arr_count, arr_nobs, SIGNIFICANCE_LEVEL = 0.05):
  """
  This function is to test 2 metric proportion of user if they are siginificantly different
  :params:
    :arr_count: array - (k_y, k_x) with k_y is number of succeed in variant group, k_x is number of suceed in control group
    :arr_nobs: array - (n_y, n_x) with n_y is number of observations in varinat group, n_x is number of observations in control group
  :return:
    :dataframe include t-stats, p-value, proportion, confidence interval
  """

  p_hat_y = arr_count[0]/arr_nobs[0]
  p_hat_x = arr_count[1]/arr_nobs[1]

  if(p_hat_x == 0):
    return (pd.DataFrame([[np.nan, np.nan, p_hat_y, p_hat_x, np.nan, np.nan, np.nan]],\
                            columns = ['t_stats', 'p_value', 'mean_variant', 'mean_control', 'uplift_percent', 'lower_bound_uplift_percent', 'upper_bound_uplift_percent']))


  t_stats, p_value = proportions_ztest(arr_count, arr_nobs, alternative = 'two-sided', prop_var = False)

  p_hat = (arr_count[0] + arr_count[1])/(arr_nobs[0] + arr_nobs[1])
  std_delta = np.sqrt(p_hat*(1 - p_hat)*(1/arr_nobs[0] + 1/arr_nobs[1]))

  critical_value = stats.norm.ppf(q = 1- SIGNIFICANCE_LEVEL/2)

  lower_bound = p_hat_y - p_hat_x - critical_value*std_delta
  upper_bound = p_hat_y - p_hat_x + critical_value*std_delta

  lower_bound_uplift_percent = lower_bound/p_hat_x
  upper_bound_uplift_percent = upper_bound/p_hat_x
  uplift_percent = (p_hat_y - p_hat_x)/p_hat_x

  return (pd.DataFrame([['z-test',t_stats, p_value, p_hat_y, p_hat_x, uplift_percent, lower_bound_uplift_percent, upper_bound_uplift_percent]], columns = ['method', 't_stats', 'p_value', 'mean_variant', 'mean_control', 'uplift_percent', 'lower_bound_uplift_percent', 'upper_bound_uplift_percent']))