# 02_focused_itt_analysis.ipynb: Intent-to-Treat Effects via 97-3 Natural Experiment

## Estimands
1. **User-level ITT on purchase probability** in window [S, E)
2. **User-level ITT on revenue per user** in the same window

## Method
- Uses only `AUCTIONS_USERS` and `PURCHASES` tables
- Reconstructs control group size using 97-3 split on active users
- Implements exact mathematical estimator with proper diagnostics

## Key Innovation
3% of users are randomly blocked from ads, providing a clean control group. We observe their purchases but not their browsing activity, requiring reconstruction of the denominator.

In [1]:
# Setup
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from dotenv import load_dotenv
import snowflake.connector
from scipy import stats
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

load_dotenv()

print("="*80)
print("INTENT-TO-TREAT ANALYSIS USING MINIMAL DATA")
print("="*80)

INTENT-TO-TREAT ANALYSIS USING MINIMAL DATA


In [2]:
# Configuration
ANALYSIS_START = '2025-08-01'  # S
ANALYSIS_END = '2025-09-01'    # E
LOOKBACK_DAYS = 30              # L

# Known split from platform design
TREATMENT_PROP = 0.97
CONTROL_PROP = 0.03

print(f"Analysis window: [{ANALYSIS_START}, {ANALYSIS_END})")
print(f"Lookback: {LOOKBACK_DAYS} days")
print(f"Population split: {TREATMENT_PROP:.0%} / {CONTROL_PROP:.0%}")

Analysis window: [2025-08-01, 2025-09-01)
Lookback: 30 days
Population split: 97% / 3%


In [3]:
# Snowflake connection
try:
    conn = snowflake.connector.connect(
        user=os.getenv('SNOWFLAKE_USER'),
        password=os.getenv('SNOWFLAKE_PASSWORD'),
        account=os.getenv('SNOWFLAKE_ACCOUNT'),
        warehouse=os.getenv('SNOWFLAKE_WAREHOUSE', 'COMPUTE_WH'),
        database='INCREMENTALITY',
        schema='INCREMENTALITY_RESEARCH'
    )
    print("✅ Connected to Snowflake")
except Exception as e:
    print(f"❌ Connection failed: {e}")
    sys.exit(1)

✅ Connected to Snowflake


## Section 1: Core ITT Estimator

### Mathematical Framework

**Observed quantities:**
- Observed_T = count(distinct users in AUCTIONS_USERS during [S-L, E))
- T_p = treatment users who purchased in [S, E)
- C_p = control users who purchased in [S, E)

**Reconstruction:**
- Observed_Ĉ = Observed_T × (0.03/0.97)
- C_np̂ = Observed_Ĉ - C_p

**ITT Effects:**
- Rate_T = T_p / Observed_T
- Rate_Ĉ = C_p / Observed_Ĉ
- ITT_lift = Rate_T - Rate_Ĉ

In [4]:
def calculate_itt(conn, start_date, end_date, lookback_days):
    """
    Calculate Intent-to-Treat effect on purchase probability.
    Returns all counts and rates in a single query.
    """
    
    lookback_start = pd.to_datetime(start_date) - timedelta(days=lookback_days)
    lookback_start_str = lookback_start.strftime('%Y-%m-%d')
    
    query = f"""
    WITH au AS (
        -- Treatment group: users exposed to ads
        SELECT DISTINCT OPAQUE_USER_ID AS user_id
        FROM AUCTIONS_USERS
        WHERE CREATED_AT >= '{lookback_start_str}'
          AND CREATED_AT < '{end_date}'
    ),
    pu AS (
        -- All purchasers in analysis window
        SELECT DISTINCT USER_ID AS user_id
        FROM PURCHASES
        WHERE PURCHASED_AT >= '{start_date}'
          AND PURCHASED_AT < '{end_date}'
    ),
    -- Calculate counts
    t_obs AS (SELECT COUNT(*) AS observed_t FROM au),
    t_p AS (
        SELECT COUNT(*) AS t_p
        FROM pu WHERE user_id IN (SELECT user_id FROM au)
    ),
    c_p AS (
        SELECT COUNT(*) AS c_p
        FROM pu WHERE user_id NOT IN (SELECT user_id FROM au)
    )
    
    SELECT
        -- Observed counts
        t_obs.observed_t AS observed_t,
        t_p.t_p AS t_p,
        (t_obs.observed_t - t_p.t_p) AS t_np,
        c_p.c_p AS c_p,
        
        -- Reconstructed control
        ROUND(t_obs.observed_t * {CONTROL_PROP}/{TREATMENT_PROP}) AS observed_c_hat,
        ROUND(t_obs.observed_t * {CONTROL_PROP}/{TREATMENT_PROP} - c_p.c_p) AS c_np_hat,
        
        -- Rates
        (t_p.t_p / NULLIF(t_obs.observed_t, 0)) AS rate_t,
        (c_p.c_p / NULLIF(t_obs.observed_t * {CONTROL_PROP}/{TREATMENT_PROP}, 0)) AS rate_c_hat,
        
        -- ITT lift
        ((t_p.t_p / NULLIF(t_obs.observed_t, 0)) - 
         (c_p.c_p / NULLIF(t_obs.observed_t * {CONTROL_PROP}/{TREATMENT_PROP}, 0))) AS itt_lift,
        
        -- Relative lift
        CASE WHEN c_p.c_p > 0 THEN
            (((t_p.t_p / NULLIF(t_obs.observed_t, 0)) - 
              (c_p.c_p / NULLIF(t_obs.observed_t * {CONTROL_PROP}/{TREATMENT_PROP}, 0))) /
             (c_p.c_p / NULLIF(t_obs.observed_t * {CONTROL_PROP}/{TREATMENT_PROP}, 0))) * 100
        ELSE NULL END AS relative_lift_pct
        
    FROM t_obs 
    CROSS JOIN t_p 
    CROSS JOIN c_p
    """
    
    df = pd.read_sql(query, conn)
    return df.iloc[0].to_dict()

# Run main analysis
print("\nRunning main ITT analysis...")
results = calculate_itt(conn, ANALYSIS_START, ANALYSIS_END, LOOKBACK_DAYS)


Running main ITT analysis...


In [5]:
# Display results and run diagnostics
print("\n" + "="*80)
print("DIAGNOSTIC CHECKS")
print("="*80)

# Critical feasibility check
feasible = results['C_P'] <= results['OBSERVED_C_HAT']
print(f"\n1. Feasibility Check: C_p ({results['C_P']:,.0f}) ≤ Observed_Ĉ ({results['OBSERVED_C_HAT']:,.0f})")
print(f"   Status: {'✅ PASS' if feasible else '❌ FAIL - Increase lookback or check data'}")

if not feasible:
    print("\n⚠️ WARNING: Control purchasers exceed estimated control population!")
    print("   This violates the 97-3 split assumption. Possible causes:")
    print("   - Lookback window too short (recently treated users misclassified)")
    print("   - User ID mismatch between tables")
    print("   - Platform split not exactly 97-3 this period")
    # Continue anyway for diagnostic purposes

# Rate bounds check
print(f"\n2. Rate Bounds:")
print(f"   Treatment rate: {results['RATE_T']:.4f} (must be in [0,1])")
print(f"   Control rate:   {results['RATE_C_HAT']:.4f} (must be in [0,1])")
rate_valid = 0 <= results['RATE_T'] <= 1 and 0 <= results['RATE_C_HAT'] <= 1
print(f"   Status: {'✅ PASS' if rate_valid else '❌ FAIL'}")


DIAGNOSTIC CHECKS

1. Feasibility Check: C_p (0) ≤ Observed_Ĉ (311,238)
   Status: ✅ PASS

2. Rate Bounds:
   Treatment rate: 0.1392 (must be in [0,1])
   Control rate:   0.0000 (must be in [0,1])
   Status: ✅ PASS


In [6]:
# Display main results
print("\n" + "="*80)
print("PRIMARY RESULTS")
print("="*80)

print("\n1. CONTINGENCY TABLE:")
print("-"*40)
print(f"                   Purchased    Did Not Purchase    Total")
print(f"Treatment Group    {results['T_P']:>9,.0f}    {results['T_NP']:>16,.0f}    {results['OBSERVED_T']:>9,.0f}")
print(f"Control Group      {results['C_P']:>9,.0f}    {results['C_NP_HAT']:>16,.0f}    {results['OBSERVED_C_HAT']:>9,.0f}")
print(f"                   (observed)         (estimated)       (estimated)")

print("\n2. PURCHASE RATES:")
print("-"*40)
print(f"Treatment:   {results['RATE_T']:.4f} ({results['RATE_T']*100:.2f}%)")
print(f"Control:     {results['RATE_C_HAT']:.4f} ({results['RATE_C_HAT']*100:.2f}%)")

print("\n3. INTENT-TO-TREAT EFFECT:")
print("-"*40)
print(f"ITT Lift (pp):     {results['ITT_LIFT']*100:.2f}")
print(f"Relative Lift:     {results['RELATIVE_LIFT_PCT']:.1f}%" if results['RELATIVE_LIFT_PCT'] else "N/A")

print(f"\nInterpretation: Ads increase purchase probability by {results['ITT_LIFT']*100:.2f} percentage points")


PRIMARY RESULTS

1. CONTINGENCY TABLE:
----------------------------------------
                   Purchased    Did Not Purchase    Total
Treatment Group    1,400,423           8,662,941    10,063,364
Control Group              0             311,238      311,238
                   (observed)         (estimated)       (estimated)

2. PURCHASE RATES:
----------------------------------------
Treatment:   0.1392 (13.92%)
Control:     0.0000 (0.00%)

3. INTENT-TO-TREAT EFFECT:
----------------------------------------
ITT Lift (pp):     13.92
N/A

Interpretation: Ads increase purchase probability by 13.92 percentage points


## Section 2: Lookback Window Analysis

Plot ATE vs L ∈ {0, 7, 14, 30, 45, 60} to find where it stabilizes.

In [7]:
# Test different lookback windows
lookback_days = [0, 7, 14, 30, 45, 60]
lookback_results = []

print("\n" + "="*80)
print("LOOKBACK WINDOW ANALYSIS")
print("="*80)

for L in tqdm(lookback_days, desc="Testing lookback windows"):
    res = calculate_itt(conn, ANALYSIS_START, ANALYSIS_END, L)
    lookback_results.append({
        'L': L,
        'ITT': res['ITT_LIFT'],
        'Feasible': res['C_P'] <= res['OBSERVED_C_HAT'],
        'C_p': res['C_P'],
        'Observed_C': res['OBSERVED_C_HAT']
    })

# Display results
print("\nLookback | ITT Lift (pp) | Feasible | C_p    | Obs_Ĉ")
print("-"*60)
for r in lookback_results:
    print(f"{r['L']:>8} | {r['ITT']*100:>13.3f} | {('✓' if r['Feasible'] else '✗'):^8} | "
          f"{r['C_p']:>6,.0f} | {r['Observed_C']:>8,.0f}")

# Find stabilization point
stable_idx = None
for i in range(1, len(lookback_results)):
    if lookback_results[i]['Feasible']:
        if stable_idx is None:
            stable_idx = i
        # Check if change is < 10% relative
        if i > 0 and abs(lookback_results[i]['ITT'] - lookback_results[i-1]['ITT']) / abs(lookback_results[i-1]['ITT']) < 0.1:
            break

if stable_idx:
    optimal_L = lookback_results[stable_idx]['L']
    print(f"\n✓ Recommended lookback: {optimal_L} days (first feasible with stable ITT)")
else:
    print("\n⚠️ No feasible lookback found. Check data quality.")


LOOKBACK WINDOW ANALYSIS


Testing lookback windows: 100%|██████████| 6/6 [03:04<00:00, 30.71s/it]


Lookback | ITT Lift (pp) | Feasible | C_p    | Obs_Ĉ
------------------------------------------------------------
       0 |        19.395 |    ✓     |      0 |  222,653
       7 |        17.452 |    ✓     |      0 |  247,764
      14 |        16.032 |    ✓     |      0 |  269,885
      30 |        13.916 |    ✓     |      0 |  311,238
      45 |        12.612 |    ✓     |      0 |  343,670
      60 |        11.633 |    ✓     |      0 |  372,862

✓ Recommended lookback: 7 days (first feasible with stable ITT)





## Section 3: Revenue Analysis (ARPU)

Calculate Average Revenue Per User correctly:
- ARPU_T = (Total Treatment Revenue) / Observed_T
- ARPU_Ĉ = (Total Control Revenue) / Observed_Ĉ

In [12]:
def calculate_revenue_itt(conn, start_date, end_date, lookback_days, winsorize_pct=99):
    """
    Calculate Intent-to-Treat effect on revenue per user.
    Fixed for Snowflake compatibility.
    """

    lookback_start = pd.to_datetime(start_date) - timedelta(days=lookback_days)
    lookback_start_str = lookback_start.strftime('%Y-%m-%d')

    # First get treatment user count
    query = f"""
    WITH au AS (
        SELECT DISTINCT OPAQUE_USER_ID AS user_id
        FROM AUCTIONS_USERS
        WHERE CREATED_AT >= '{lookback_start_str}'
          AND CREATED_AT < '{end_date}'
    ),
    au_count AS (
        SELECT COUNT(*) as observed_t FROM au
    ),
    revenue_data AS (
        SELECT
            p.USER_ID as user_id,
            SUM(p.QUANTITY * p.UNIT_PRICE) as total_revenue
        FROM PURCHASES p
        WHERE p.PURCHASED_AT >= '{start_date}'
          AND p.PURCHASED_AT < '{end_date}'
        GROUP BY p.USER_ID
    ),
    revenue_split AS (
        SELECT
            r.user_id,
            r.total_revenue,
            CASE WHEN EXISTS (SELECT 1 FROM au WHERE au.user_id = r.user_id)
                 THEN 'treatment'
                 ELSE 'control'
            END as group_type
        FROM revenue_data r
    )
    SELECT
        (SELECT observed_t FROM au_count) as observed_t,
        COALESCE(SUM(CASE WHEN group_type = 'treatment' THEN total_revenue END), 0) as revenue_t,
        COALESCE(SUM(CASE WHEN group_type = 'control' THEN total_revenue END), 0) as revenue_c,
        PERCENTILE_CONT({winsorize_pct/100}) WITHIN GROUP (ORDER BY total_revenue) as p{winsorize_pct}_revenue
    FROM revenue_split
    """

    df = pd.read_sql(query, conn)
    row = df.iloc[0]

    # Calculate ARPU
    observed_t = row['OBSERVED_T']
    observed_c_hat = int(observed_t * (CONTROL_PROP / TREATMENT_PROP))

    arpu_t = row['REVENUE_T'] / observed_t if observed_t > 0 else 0
    arpu_c = row['REVENUE_C'] / observed_c_hat if observed_c_hat > 0 else 0

    return {
        'observed_t': observed_t,
        'observed_c_hat': observed_c_hat,
        'revenue_t': row['REVENUE_T'],
        'revenue_c': row['REVENUE_C'],
        'arpu_t': arpu_t,
        'arpu_c': arpu_c,
        'itt_revenue_lift': arpu_t - arpu_c,
        'relative_revenue_lift': ((arpu_t - arpu_c) / arpu_c * 100) if arpu_c > 0 else None,
        'p99_cap': row[f'P{winsorize_pct}_REVENUE']
    }

# Calculate revenue ITT
print("\nCalculating revenue ITT...")
revenue_results = calculate_revenue_itt(conn, ANALYSIS_START, ANALYSIS_END, LOOKBACK_DAYS)


Calculating revenue ITT...


In [13]:
# Display revenue results
print("\n" + "="*80)
print("REVENUE ANALYSIS (ARPU)")
print("="*80)

print("\n1. TOTAL REVENUE:")
print("-"*40)
print(f"Treatment total:   ${revenue_results['revenue_t']:>12,.2f}")
print(f"Control total:     ${revenue_results['revenue_c']:>12,.2f}")

print("\n2. DENOMINATORS (INCLUDING ZEROS):")
print("-"*40)
print(f"Treatment users:   {revenue_results['observed_t']:>12,}")
print(f"Control users:     {revenue_results['observed_c_hat']:>12,} (estimated)")

print("\n3. AVERAGE REVENUE PER USER (ARPU):")
print("-"*40)
print(f"ARPU_T:            ${revenue_results['arpu_t']:>12.2f}")
print(f"ARPU_Ĉ:           ${revenue_results['arpu_c']:>12.2f}")

print("\n4. ITT REVENUE EFFECT:")
print("-"*40)
print(f"Revenue lift/user: ${revenue_results['itt_revenue_lift']:>12.2f}")
if revenue_results['relative_revenue_lift']:
    print(f"Relative lift:     {revenue_results['relative_revenue_lift']:>12.1f}%")

print(f"\nInterpretation: Ads increase revenue by ${revenue_results['itt_revenue_lift']:.2f} per user")


REVENUE ANALYSIS (ARPU)

1. TOTAL REVENUE:
----------------------------------------
Treatment total:   $16,266,808,803.00
Control total:     $1,101,545,750.00

2. DENOMINATORS (INCLUDING ZEROS):
----------------------------------------
Treatment users:   10,063,364.0
Control users:          311,238 (estimated)

3. AVERAGE REVENUE PER USER (ARPU):
----------------------------------------
ARPU_T:            $     1616.44
ARPU_Ĉ:           $     3539.24

4. ITT REVENUE EFFECT:
----------------------------------------
Revenue lift/user: $    -1922.80
Relative lift:            -54.3%

Interpretation: Ads increase revenue by $-1922.80 per user


## Section 4: Sensitivity to Activity Ratio

Test how results change if ads affect user activity (r = activity_T / activity_C).

In [14]:
def sensitivity_analysis(base_results, r_values):
    """
    Adjust for different activity ratios.
    r > 1 means ads increase platform activity.
    """
    sensitivity = []
    
    for r in r_values:
        # Adjust control size
        observed_t = base_results['OBSERVED_T']
        observed_c_adj = (observed_t / r) * (CONTROL_PROP / TREATMENT_PROP)
        
        # Recalculate rates
        rate_t = base_results['RATE_T']
        rate_c_adj = base_results['C_P'] / observed_c_adj if observed_c_adj > 0 else 0
        
        # New ITT
        itt_adj = rate_t - rate_c_adj
        rel_lift_adj = (itt_adj / rate_c_adj * 100) if rate_c_adj > 0 else None
        
        sensitivity.append({
            'r': r,
            'observed_c_adj': observed_c_adj,
            'rate_c_adj': rate_c_adj,
            'itt_adj': itt_adj,
            'rel_lift_adj': rel_lift_adj
        })
    
    return sensitivity

# Run sensitivity analysis
r_values = [0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2]
sensitivity = sensitivity_analysis(results, r_values)

print("\n" + "="*80)
print("SENSITIVITY TO ACTIVITY RATIO")
print("="*80)
print("\nr = (Treatment Activity) / (Control Activity)")
print("r > 1 means ads increase browsing activity\n")

print("r     | Obs_Ĉ(r) | Rate_Ĉ(r) | ITT(r) pp | Rel Lift(r) %")
print("-"*60)

for s in sensitivity:
    print(f"{s['r']:>5.2f} | {s['observed_c_adj']:>8.0f} | {s['rate_c_adj']:>9.4f} | "
          f"{s['itt_adj']*100:>9.2f} | {s['rel_lift_adj']:>13.1f}" if s['rel_lift_adj'] else "")

# Find bounds
itt_min = min(s['itt_adj'] for s in sensitivity)
itt_max = max(s['itt_adj'] for s in sensitivity)

print(f"\nITT bounds across r ∈ [0.9, 1.2]:")
print(f"  Minimum: {itt_min*100:.2f} pp (r=0.9, ads decrease activity)")
print(f"  Maximum: {itt_max*100:.2f} pp (r=1.2, ads increase activity)")
print(f"  Range:   {(itt_max-itt_min)*100:.2f} pp")


SENSITIVITY TO ACTIVITY RATIO

r = (Treatment Activity) / (Control Activity)
r > 1 means ads increase browsing activity

r     | Obs_Ĉ(r) | Rate_Ĉ(r) | ITT(r) pp | Rel Lift(r) %
------------------------------------------------------------








ITT bounds across r ∈ [0.9, 1.2]:
  Minimum: 13.92 pp (r=0.9, ads decrease activity)
  Maximum: 13.92 pp (r=1.2, ads increase activity)
  Range:   0.00 pp


## Section 5: Statistical Inference

In [15]:
# Statistical tests
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

print("\n" + "="*80)
print("STATISTICAL INFERENCE")
print("="*80)

# Two-proportion z-test
counts = [int(results['T_P']), int(results['C_P'])]
nobs = [int(results['OBSERVED_T']), int(results['OBSERVED_C_HAT'])]

if all(n > 0 for n in nobs):
    z_stat, p_value = proportions_ztest(counts, nobs)
    
    print("\n1. TWO-PROPORTION Z-TEST:")
    print("-"*40)
    print(f"Z-statistic:  {z_stat:>8.3f}")
    print(f"P-value:      {p_value:>8.6f}")
    print(f"Significant:  {'Yes (p < 0.01)' if p_value < 0.01 else 'Yes (p < 0.05)' if p_value < 0.05 else 'No'}")
    
    # Confidence intervals for rates
    print("\n2. 95% CONFIDENCE INTERVALS:")
    print("-"*40)
    
    # Treatment rate CI
    ci_t_low, ci_t_high = proportion_confint(
        count=int(results['T_P']), 
        nobs=int(results['OBSERVED_T']),
        alpha=0.05,
        method='wilson'
    )
    print(f"Rate_T:       {results['RATE_T']:.4f} [{ci_t_low:.4f}, {ci_t_high:.4f}]")
    
    # Control rate CI
    ci_c_low, ci_c_high = proportion_confint(
        count=int(results['C_P']),
        nobs=int(results['OBSERVED_C_HAT']),
        alpha=0.05,
        method='wilson'
    )
    print(f"Rate_Ĉ:      {results['RATE_C_HAT']:.4f} [{ci_c_low:.4f}, {ci_c_high:.4f}]")
    
    # ITT CI (delta method approximation)
    se_diff = np.sqrt(
        (results['RATE_T'] * (1 - results['RATE_T'])) / results['OBSERVED_T'] +
        (results['RATE_C_HAT'] * (1 - results['RATE_C_HAT'])) / results['OBSERVED_C_HAT']
    )
    itt_ci_low = results['ITT_LIFT'] - 1.96 * se_diff
    itt_ci_high = results['ITT_LIFT'] + 1.96 * se_diff
    
    print(f"\nITT Lift:     {results['ITT_LIFT']*100:.2f} pp [{itt_ci_low*100:.2f}, {itt_ci_high*100:.2f}]")
    print(f"Standard Error: {se_diff*100:.3f} pp")
else:
    print("\n⚠️ Cannot perform statistical tests: zero observations in one group")


STATISTICAL INFERENCE

1. TWO-PROPORTION Z-TEST:
----------------------------------------
Z-statistic:   223.765
P-value:      0.000000
Significant:  Yes (p < 0.01)

2. 95% CONFIDENCE INTERVALS:
----------------------------------------
Rate_T:       0.1392 [0.1389, 0.1394]
Rate_Ĉ:      0.0000 [0.0000, 0.0000]

ITT Lift:     13.92 pp [13.89, 13.94]
Standard Error: 0.011 pp


## Section 6: Summary and Caveats

In [None]:
# Final summary
print("\n" + "="*80)
print("SUMMARY REPORT")
print("="*80)

# Handle None values for display
rel_lift_str = f"{results['RELATIVE_LIFT_PCT']:.1f}%" if results['RELATIVE_LIFT_PCT'] else "N/A"
rev_rel_lift_str = f"{revenue_results['relative_revenue_lift']:.1f}%" if revenue_results.get('relative_revenue_lift') else "N/A"

print(f"""
CONFIGURATION:
--------------
Window:       [{ANALYSIS_START}, {ANALYSIS_END})
Lookback:     {LOOKBACK_DAYS} days
Split:        {TREATMENT_PROP:.0%} / {CONTROL_PROP:.0%}

PRIMARY RESULTS:
----------------
Purchase ITT: {results['ITT_LIFT']*100:.2f} pp ({rel_lift_str} relative lift)
Revenue ITT:  ${revenue_results['itt_revenue_lift']:.2f}/user ({rev_rel_lift_str} lift)
Statistical significance: p < {p_value:.6f}

SAMPLE SIZES:
-------------
Treatment:    {results['OBSERVED_T']:,} users observed
Control:      {results['C_P']:,} purchasers observed
              {results['OBSERVED_C_HAT']:,.0f} users estimated

ROBUSTNESS:
-----------
Feasibility:  {'✓ PASS' if feasible else '✗ FAIL'}
ITT range:    [{itt_min*100:.2f}, {itt_max*100:.2f}] pp for r ∈ [0.9, 1.2]

CAVEATS:
--------
1. Assumes activity parity between groups (tested via sensitivity)
2. Requires consistent user IDs across tables
3. ITT on active users only (cannot see inactive controls)
4. No product-level or click-level analysis possible
5. Platform split assumed exactly {TREATMENT_PROP:.0%}/{CONTROL_PROP:.0%}

INTERPRETATION:
---------------
Being eligible to see ads increases purchase probability by {results['ITT_LIFT']*100:.1f} pp
and revenue by ${revenue_results['itt_revenue_lift']:.2f} per active user.
""")

In [None]:
# Save results to file
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = f"itt_results_{timestamp}.txt"

with open(output_file, 'w') as f:
    f.write("INTENT-TO-TREAT ANALYSIS RESULTS\n")
    f.write("="*80 + "\n\n")
    f.write(f"Generated: {datetime.now()}\n")
    f.write(f"Window: [{ANALYSIS_START}, {ANALYSIS_END})\n")
    f.write(f"Lookback: {LOOKBACK_DAYS} days\n\n")
    
    # Write key metrics
    f.write("KEY METRICS:\n")
    f.write("-"*40 + "\n")
    f.write(f"Observed_T: {results['OBSERVED_T']:,}\n")
    f.write(f"T_p: {results['T_P']:,}\n")
    f.write(f"T_np: {results['T_NP']:,}\n")
    f.write(f"C_p: {results['C_P']:,}\n")
    f.write(f"C_np_hat: {results['C_NP_HAT']:,.0f}\n")
    f.write(f"Observed_C_hat: {results['OBSERVED_C_HAT']:,.0f}\n\n")
    
    f.write(f"Rate_T: {results['RATE_T']:.4f}\n")
    f.write(f"Rate_C_hat: {results['RATE_C_HAT']:.4f}\n")
    f.write(f"ITT_lift: {results['ITT_LIFT']:.4f} ({results['ITT_LIFT']*100:.2f} pp)\n")
    
    # Handle None values
    if results['RELATIVE_LIFT_PCT']:
        f.write(f"Relative_lift: {results['RELATIVE_LIFT_PCT']:.1f}%\n\n")
    else:
        f.write("Relative_lift: N/A (control rate is zero)\n\n")
    
    f.write(f"ARPU_T: ${revenue_results['arpu_t']:.2f}\n")
    f.write(f"ARPU_C: ${revenue_results['arpu_c']:.2f}\n")
    f.write(f"Revenue_lift: ${revenue_results['itt_revenue_lift']:.2f}\n\n")
    
    f.write(f"P-value: {p_value:.6f}\n")
    f.write(f"95% CI: [{itt_ci_low*100:.2f}, {itt_ci_high*100:.2f}] pp\n")

print(f"\n✅ Results saved to {output_file}")

In [18]:
# Close connection
if conn:
    conn.close()
    print("\n✅ Snowflake connection closed")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)


✅ Snowflake connection closed

ANALYSIS COMPLETE
