# Example of Use

## Installation of Package


In [1]:
#!pip install ab-testing-module

## Data

In [2]:
import numpy as np
import pandas as pd

# Set seed for reproducibility
np.random.seed(42)

# Generate sample data for two groups
data = {
    'group': np.random.choice(['control', 'treatment'], size=100),  # 100 samples, two groups
    'outcome': np.concatenate([
        np.random.normal(loc=5, scale=1, size=50),  # Control group: mean=5, std=1
        np.random.normal(loc=5.5, scale=1, size=50)  # Treatment group: mean=5.5, std=1
    ])
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate the median of the 'outcome' values
median_outcome = df['outcome'].median()

# Classify 'outcome' values as 0 if below median, 1 if equal to or above median
df['outcome'] = (df['outcome'] >= median_outcome).astype(int)

df.head()



Unnamed: 0,group,outcome
0,control,1
1,treatment,0
2,control,0
3,control,0
4,control,0


## Data Visualization / Data Exploration

In [3]:
## Import library
from ab_testing_module import data_viz

In [4]:
visualizations = data_viz(data=df, 
                        group_column='group', 
                        value_column='outcome',
                        viz_types=['boxplot', 'violinplot', 'histogram', 'countplot', 'heatmap'])

## Test Results

In [5]:
## Import library
from ab_testing_module import ab_test

In [6]:
df_results = ab_test(data=df, 
                    group_column='group', 
                    value_column='outcome', 
                    control_group='control', 
                    alpha=0.05, 
                    handle_outliers=None, 
                    mc_correction=None)

In [7]:
df_results

Unnamed: 0,Test,P Value,Effect Size,Power,Interpretation
0,Chi-square Test,0.5456,Phi Coefficient: 7.4839,0.092782,"For the Chi-square Test, a test statistic of 0.3653 and a p-value of 0.5456 provides insufficient evidence to reject the null hypothesis at the this level. The effect size is 7.4839. The power of the test is 0.0928, indicating a potential risk of Type II error (failing to detect a true effect)."


## Advanced Modeling

In [8]:
## Import library
from ab_testing_module import modeling

In [9]:
# Perform advanced modeling
model_summaries, interpretations, model_summaries_df, interpretations_df = modeling(data=df, 
                                                                                    group_column='group', 
                                                                                    value_column='outcome', 
                                                                                    control_group='control')

Optimization terminated successfully.
         Current function value: 0.689897
         Iterations 4


In [10]:
model_summaries_df

Unnamed: 0,Term,Coefficient,Std Err,z or t,P>|z|,Conf. Interval Lower,Conf. Interval Upper
0,Logistic Regression Model,,,,,,
1,Intercept,0.182322,0.302765,0.602188,0.547049,-0.411087,0.77573
2,C(group)[T.treatment],-0.325422,0.404304,-0.804896,0.42088,-1.117843,0.466998


In [11]:
interpretations_df

Unnamed: 0,Model Type,Interpretation
0,Logistic Regression Model Interpretation,Interpretation of the Logistic Model: No predictors were found to be statistically significant at the 5% significance level.
