# 0.0 Imports

In [1]:
import pandas as pd
import numpy as np
import math

from statsmodels.stats import api as sm
from scipy.stats import chi2_contingency

## 0.1 Loading data

In [2]:
df_raw = pd.read_csv('../data/ab_testing.crdownload')

In [3]:
df_raw.head()

Unnamed: 0,uid,country,gender,spent,purchases,date,group,device
0,11115722,MEX,F,1595.0,5.0,2016-03-08,GRP B,I
1,11122053,USA,M,498.0,2.0,2017-07-14,GRP B,I
2,11128688,USA,F,2394.0,6.0,2017-09-17,GRP A,I
3,11130578,USA,F,1197.0,3.0,2017-11-30,GRP A,I
4,11130759,ESP,M,1297.0,3.0,2018-01-10,GRP B,A


- Objetivo: Definir se o preenchimento automático do cartão de crédito é melhor que o preenchimento manual
- Métrica: GMV ( Gross Margin Revenue ) médio

# 1.0 Data Description

In [4]:
df1 = df_raw.copy()

## 1.1 Data Dimensions

In [5]:
print("Número de linhas: {}".format(df1.shape[0]))
print("Número de colunas: {}".format(df1.shape[1]))

Número de linhas: 25230
Número de colunas: 8


## 1.2 Data Types

In [6]:
df1.dtypes

uid            int64
country       object
gender        object
spent        float64
purchases    float64
date          object
group         object
device        object
dtype: object

## 1.3 Check NA

In [7]:
df1.isna().sum()

uid          0
country      1
gender       1
spent        1
purchases    1
date         1
group        1
device       1
dtype: int64

## 1.4 Drop NA

In [8]:
# Droping little NA data
df1 = df1.dropna()

## 1.5 Change Types

In [9]:
# Changing purchases to int64
df1['purchases'] = df1['purchases'].astype(int)

# Changing date to datetime
df1['date'] = pd.to_datetime(df1['date'])

# Changing spent to int64
df1['spent'] = df1['spent'].astype(int)

## 1.6 Checking

In [10]:
df1.isna().sum()

uid          0
country      0
gender       0
spent        0
purchases    0
date         0
group        0
device       0
dtype: int64

## 1.7 Descriptive Statistical

In [13]:
df1.dtypes

uid                       int64
country                  object
gender                   object
spent                     int64
purchases                 int64
date             datetime64[ns]
group                    object
device                   object
total_expense             int64
dtype: object

In [14]:
# Separação entre variáveis numéricas e categóricas
num_attributes = df1.select_dtypes( include=['int64', 'float64'])
cat_attributes = df1.select_dtypes( exclude=['int64', 'float64', 'datetime64[ns]'])

### 1.8.1 Numerical Atribbutes

In [15]:
# Central tendency - mean, median
ct1 = pd.DataFrame(num_attributes.apply(np.mean)).T
ct2 = pd.DataFrame(num_attributes.apply(np.median)).T

# Dispersion - std, max, min, range, skew, kurtosis
d1 = pd.DataFrame(num_attributes.apply(np.std)).T
d2 = pd.DataFrame(num_attributes.apply(min)).T
d3 = pd.DataFrame(num_attributes.apply(max)).T
d4 = pd.DataFrame(num_attributes.apply(lambda x: x.max() - x.min())).T
d5 = pd.DataFrame(num_attributes.apply(lambda x: x.skew())).T
d6 = pd.DataFrame(num_attributes.apply(lambda x: x.kurtosis())).T

# Concatenate
m = pd.concat([d2, d3, d4, ct1, ct2, d1, d5, d6]).T.reset_index()
m.columns = ['attributes', 'min', 'max', 'range', 'mean', 'median', 'std', 'skew', 'kurtosis']

m

Unnamed: 0,attributes,min,max,range,mean,median,std,skew,kurtosis
0,uid,11115722.0,59710537.0,48594815.0,35515020.0,35532928.0,14035030.0,-0.009332,-1.204568
1,spent,99.0,12170.0,12071.0,1885.71,1596.0,1360.678,1.350407,2.591977
2,purchases,1.0,30.0,29.0,4.54362,4.0,3.097113,1.359553,2.679187
3,total_expense,99.0,365100.0,365001.0,12539.72,5984.0,18623.18,3.967317,27.006059


# 2.0 Design do experimento

## 2.1 Hipótese do experimento

In [38]:
# Nível de confiança
confidence = 0.95

# Limite de significância
significance = 0.05

# conversões da página nova e página atual
p1 = 0.15
p2 = 0.13

# tamanho do efeito
effect_size = sm.proportion_effectsize(p1, p2)

# poder estatístico
power = 0.80

## 2.2 Sample size

In [40]:
# sample size
sample_n = math.ceil(sm.NormalIndPower().solve_power(
    effect_size,
    power=power,
    alpha=significance
))
sample_n

4720

# 3.0 Hypothesis Test

# 4.0 Conclusion