# Statistical Analysis

## Set up Single Metric

In [4]:
import pandas as pd
import os
import ab_testing_utils as ab  # Pastikan file ab_testing_utils.py ada satu folder dengan notebook ini

# ==========================================
# 1. SETUP DAFTAR FILE
# ==========================================
base_dir = os.path.join(os.getcwd(), 'raw_dataset')

file_names = [
    'test1_menu.csv',
    'test2_novelty_slider.csv',
    'test3_product_sliders.csv',
    'test4_reviews.csv',
    'test5_search_engine.csv'
]

# ==========================================
# 2. HAJAR SEMUA (LOOPING EKSEKUSI)
# ==========================================
print(f" Analyze ({len(file_names)} DATASET)...\n")

for filename in file_names:
    file_path = os.path.join(base_dir, filename)
    
    # Cek keberadaan file
    if os.path.exists(file_path):
        print(f" PROCESSING: {filename}")
        
        try:
            # Load Data
            df = pd.read_csv(file_path)
            
            # Panggil Fungsi Sakti (Tanpa Control Map -> Fallback ke Abjad Awal)
            ab.generate_text_summary(
                df, 
                control_variant=None,  # <--- Ini kuncinya, dia bakal cari sendiri yang abjadnya paling awal
                alpha=0.05
            )
            
        except Exception as e:
            print(f"‚ùå ERROR pada file ini: {e}")
            
    else:
        print(f"‚ö†Ô∏è  WARNING: File {filename} tidak ditemukan.")

    # Pemisah antar laporan biar mata gak sakit
    print("\n" + "   " * 15 + "‚¨áÔ∏è NEXT DATASET ‚¨áÔ∏è" + "   " * 15 + "\n")

 Analyze (5 DATASET)...

 PROCESSING: test1_menu.csv

üìä EXPERIMENT SUMMARY REPORT
   ‚Ä¢ Variants: 2 (A_horizontal_menu, B_dropdown_menu)
   ‚Ä¢ Control : A_horizontal_menu
   ‚Ä¢ Alpha   : 0.05
| METRIC NAME               | TYPE         | TEST USED                 | LIFT (%)   | P-VALUE    | SIG?  |
|---------------------------|--------------|---------------------------|------------|------------|-------|
| pages_viewed              | Continuous   | Mann-Whitney U            | -2.01%     | 0.06748    | NO    |
| added_to_cart             | Binary       | Z-Test (Prop)             | -10.34%    | 0.00000    | YES   |
| bounced                   | Binary       | Z-Test (Prop)             | +2.63%     | 0.33544    | NO    |
| revenue                   | Continuous   | Mann-Whitney U            | -10.51%    | 0.00000    | YES   |
|-------------------------------------------------------------------------------------------------------|

                                             ‚¨áÔ∏è N

## Set Up Multiple Metric Correlation

In [4]:
import pandas as pd
import os
import importlib
import ab_testing_utils as ab

# 1. Reload Library (Wajib)
importlib.reload(ab)

# 2. Setup File
base_dir = os.path.join(os.getcwd(), 'raw_dataset')

file_names = [
    'test1_menu.csv',
    'test2_novelty_slider.csv',
    'test3_product_sliders.csv',
    'test4_reviews.csv',
    'test5_search_engine.csv'
]

print(f"üöÄ MEMULAI ANALISIS UNTUK {len(file_names)} DATASET (AUTO-CONTROL)...\n")

# 3. Looping Hajar
for filename in file_names:
    file_path = os.path.join(base_dir, filename)
    
    if os.path.exists(file_path):
        print(f"\n{'#'*80}")
        print(f"üìÇ FILE: {filename}")
        print(f"{'#'*80}")
        
        try:
            # A. LOAD DATA
            df = pd.read_csv(file_path)
            
            # B. TAHAP 1: ANALISIS MENTAH (Auto-Control)
            # control_variant=None -> Dia otomatis ambil yang abjad A
            hasil_analisis = ab.generate_text_summary(
                df, 
                control_variant=None, 
                alpha=0.05
            )
            
            # C. TAHAP 2: VALIDASI HOLM-BONFERRONI
            # Langsung sikat pakai data yang ditangkap di atas
            if hasil_analisis:
                ab.apply_holm_correction(hasil_analisis, alpha=0.05)
            else:
                print("‚ö†Ô∏è  Gagal mendapatkan hasil analisis (dict kosong).")

        except Exception as e:
            print(f"‚ùå ERROR: {e}")
            
    else:
        print(f"‚ö†Ô∏è  File {filename} tidak ditemukan.")

    # Jarak antar dataset
    print("\n" + "   " * 15 + "‚¨áÔ∏è NEXT ‚¨áÔ∏è" + "   " * 15 + "\n")

üöÄ MEMULAI ANALISIS UNTUK 5 DATASET (AUTO-CONTROL)...


################################################################################
üìÇ FILE: test1_menu.csv
################################################################################

üìä EXPERIMENT SUMMARY REPORT
   ‚Ä¢ Variants: 2 (A_horizontal_menu, B_dropdown_menu)
   ‚Ä¢ Control : A_horizontal_menu
   ‚Ä¢ Alpha   : 0.05
| METRIC NAME               | TYPE         | TEST USED                 | LIFT (%)   | P-VALUE    | SIG?  |
|---------------------------|--------------|---------------------------|------------|------------|-------|
| pages_viewed              | Continuous   | Mann-Whitney U            | -2.01%     | 0.06748    | NO    |
| added_to_cart             | Binary       | Z-Test (Prop)             | -10.34%    | 0.00000    | YES   |
| bounced                   | Binary       | Z-Test (Prop)             | +2.63%     | 0.33544    | NO    |
| revenue                   | Continuous   | Mann-Whitney U            | -