## Installation

If you haven't installed ExactCIs yet, run the following command:

In [None]:
# For basic installation
# !pip install exactcis

# For installation with NumPy acceleration (recommended)
# !pip install "exactcis[numpy]"

## Basic Usage

Let's start with a simple example: a 2×2 contingency table representing a clinical trial:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from exactcis import compute_all_cis

# Define our 2×2 table
#       Cases  Controls
# Exposed    a=12    b=5
# Unexposed  c=8     d=10

a, b, c, d = 12, 5, 8, 10

# Calculate the odds ratio
odds_ratio = (a * d) / (b * c)
print(f"Odds Ratio: {odds_ratio:.3f}")

# Calculate confidence intervals using all methods
results = compute_all_cis(a, b, c, d, alpha=0.05)

# Display results in a DataFrame for better presentation
df_results = pd.DataFrame([
    {"Method": method, "Lower CI": ci[0], "Upper CI": ci[1], "CI Width": ci[1] - ci[0]}
    for method, ci in results.items()
])

df_results.sort_values(by="CI Width")

## Visualizing the Confidence Intervals

We can visualize the confidence intervals for easier comparison:

In [None]:
plt.figure(figsize=(10, 6))
plt.errorbar(
    x=df_results["Method"], 
    y=df_results.apply(lambda row: np.sqrt(row["Lower CI"] * row["Upper CI"]), axis=1),  # Geometric midpoint
    xerr=None,
    yerr=[
        df_results.apply(lambda row: np.sqrt(row["Lower CI"] * row["Upper CI"]) - row["Lower CI"], axis=1),
        df_results.apply(lambda row: row["Upper CI"] - np.sqrt(row["Lower CI"] * row["Upper CI"]), axis=1)
    ],
    fmt="o",
    capsize=5
)

plt.axhline(y=odds_ratio, color="r", linestyle="--", label="Observed Odds Ratio")
plt.ylabel("Odds Ratio (log scale)")
plt.yscale("log")
plt.title("95% Confidence Intervals for Odds Ratio by Method")
plt.grid(True, alpha=0.3)
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Working with Individual Methods

Sometimes you might want to use just one specific method. Let's see how to do that:

In [None]:
from exactcis.methods import (
    exact_ci_conditional,
    exact_ci_midp,
    exact_ci_blaker,
    exact_ci_unconditional,
    exact_ci_wald_haldane
)

# Use the conditional method with a different alpha level
ci_99 = exact_ci_conditional(a, b, c, d, alpha=0.01)  # 99% confidence interval
print(f"99% Conditional CI: ({ci_99[0]:.3f}, {ci_99[1]:.3f})")

# Use the unconditional method with a larger grid for better precision
ci_unconditional = exact_ci_unconditional(a, b, c, d, alpha=0.05, grid_size=100)
print(f"Unconditional CI (grid_size=100): ({ci_unconditional[0]:.3f}, {ci_unconditional[1]:.3f})")

## Handling Edge Cases: Tables with Zero Cells

Let's see how different methods handle tables with zeros:

In [None]:
# Table with a zero cell
a_zero, b_zero, c_zero, d_zero = 10, 20, 0, 15

# Calculate confidence intervals
results_zero = compute_all_cis(a_zero, b_zero, c_zero, d_zero)

# Display results in a more readable format
for method, ci in results_zero.items():
    if ci is None:
        print(f"{method:12s} CI: Method failed for this data")
    else:
        lower, upper = ci
        if np.isinf(upper):
            print(f"{method:12s} CI: ({lower:.3f}, Infinity)")
        else:
            print(f"{method:12s} CI: ({lower:.3f}, {upper:.3f})")

## Using Timeout for Computationally Intensive Cases

For large or imbalanced tables, the unconditional method can be slow. We can use the timeout parameter to prevent excessively long computations:

In [None]:
# Larger, imbalanced table
a_large, b_large, c_large, d_large = 50, 150, 5, 100

# Use unconditional method with a 5-second timeout
import time

start_time = time.time()
try:
    ci_with_timeout = exact_ci_unconditional(a_large, b_large, c_large, d_large, timeout=5)
    print(f"Time taken: {time.time() - start_time:.2f} seconds")
    
    if ci_with_timeout is None:
        print("Calculation timed out. Consider using a different method.")
    else:
        print(f"Unconditional CI: ({ci_with_timeout[0]:.3f}, {ci_with_timeout[1]:.3f})")
except Exception as e:
    print(f"Error: {e}")
    
# Compare with faster methods
print("\nUsing faster methods:")
print(f"Conditional: {exact_ci_conditional(a_large, b_large, c_large, d_large)}")
print(f"Wald-Haldane: {exact_ci_wald_haldane(a_large, b_large, c_large, d_large)}")

## Comparing Methods for Different Sample Sizes

Let's compare how the different methods perform with tables of varying sample sizes while maintaining the same odds ratio:

In [None]:
# Starting with our base table: a=12, b=5, c=8, d=10
# The odds ratio is (12*10)/(5*8) = 3.0

# Create tables with the same odds ratio but different sample sizes
tables = [
    (3, 1, 2, 2),    # Tiny: n=8
    (6, 2, 4, 4),    # Small: n=16
    (12, 5, 8, 10),  # Original: n=35
    (24, 10, 16, 20), # 2x Original: n=70
    (60, 25, 40, 50)  # 5x Original: n=175
]

# Calculate CIs for each table and each method
ci_by_size = {}
for idx, (a, b, c, d) in enumerate(tables):
    n_total = a + b + c + d
    or_actual = (a * d) / (b * c)
    sample_name = f"n={n_total} (a={a},b={b},c={c},d={d})"
    ci_by_size[sample_name] = {
        "sample_size": n_total,
        "odds_ratio": or_actual,
        "results": compute_all_cis(a, b, c, d)
    }

# Extract CI widths for plotting
methods = list(next(iter(ci_by_size.values()))["results"].keys())
sample_names = list(ci_by_size.keys())

# Create DataFrame with CI widths
ci_widths = []
for sample_name in sample_names:
    for method in methods:
        ci = ci_by_size[sample_name]["results"][method]
        if ci is not None:
            lower, upper = ci
            width = upper - lower
        else:
            width = np.nan
        
        ci_widths.append({
            "Sample": sample_name,
            "Sample Size": ci_by_size[sample_name]["sample_size"],
            "Method": method,
            "CI Width": width,
            "Odds Ratio": ci_by_size[sample_name]["odds_ratio"]
        })

df_widths = pd.DataFrame(ci_widths)

# Plot CI widths by sample size
plt.figure(figsize=(12, 8))

for method in methods:
    data = df_widths[df_widths["Method"] == method]
    plt.plot(data["Sample Size"], data["CI Width"], "o-", label=method)

plt.xlabel("Sample Size")
plt.ylabel("CI Width")
plt.title("Confidence Interval Width by Sample Size and Method")
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

## Real-World Example: COVID-19 Vaccine Efficacy

Let's analyze a hypothetical COVID-19 vaccine trial data:

In [None]:
# Trial data
#                  Infected   Not Infected
# Vaccinated          8          9992
# Not Vaccinated     86          9914

vax_infected = 8
vax_not_infected = 9992
unvax_infected = 86
unvax_not_infected = 9914

# Calculate vaccine efficacy
# VE = 1 - Odds Ratio
odds_ratio = (vax_infected * unvax_not_infected) / (vax_not_infected * unvax_infected)
ve = 1 - odds_ratio
print(f"Vaccine Efficacy: {ve:.1%}")

# Calculate CIs for the odds ratio
results = compute_all_cis(vax_infected, vax_not_infected, unvax_infected, unvax_not_infected)

# Convert to vaccine efficacy CIs
ve_results = {}
for method, (lower, upper) in results.items():
    # For VE, we flip the CI for the odds ratio (1 - upper, 1 - lower)
    ve_results[method] = (1 - upper, 1 - lower)

# Display results
for method, (lower, upper) in ve_results.items():
    print(f"{method:12s} VE: {lower:.1%} to {upper:.1%}")

## Method Selection Guide

Here's a quick guide to choosing the appropriate method for your data:

In [None]:
# Create a function to recommend methods based on data characteristics
def recommend_methods(a, b, c, d):
    n_total = a + b + c + d
    min_cell = min(a, b, c, d)
    has_zeros = min_cell == 0
    has_small_cells = 0 < min_cell < 5
    
    print(f"Table: [{a}, {b}; {c}, {d}]")
    print(f"Total sample size: {n_total}")
    print(f"Minimum cell count: {min_cell}")
    print()
    
    if has_zeros:
        print("⚠️ Table contains zeros.")
        print("✅ Recommended methods: Conditional, Wald-Haldane")
        print("⚠️ Use caution with: MidP, Blaker")
        print("❌ Not recommended: Unconditional (may be unstable)")
        
    elif has_small_cells:
        print("⚠️ Table contains small cells (< 5).")
        print("✅ Recommended methods: Conditional, MidP, Blaker")
        print("⚠️ Use caution with: Unconditional (may be slow)")
        print("❌ Not recommended: Wald-Haldane (unless you need an approximation)")
        
    elif n_total < 50:
        print("⚠️ Small sample size (< 50).")
        print("✅ Recommended methods: MidP, Blaker, Unconditional")
        print("⚠️ Use caution with: Conditional (may be overly conservative)")
        print("❌ Not recommended: Wald-Haldane (unless you need a quick approximation)")
        
    else:
        print("✅ Large sample size with no small cells.")
        print("✅ Recommended methods: Any method should work well")
        print("ℹ️ For computational efficiency: Wald-Haldane")
        print("ℹ️ For guaranteed coverage: Conditional")
        print("ℹ️ For balanced power and coverage: MidP or Blaker")

# Examples
print("Example 1: Table with zeros")
recommend_methods(0, 5, 10, 15)
print("\n" + "-"*50 + "\n")

print("Example 2: Small cells")
recommend_methods(3, 4, 2, 8)
print("\n" + "-"*50 + "\n")

print("Example 3: Larger sample")
recommend_methods(25, 30, 15, 40)

## Conclusion

ExactCIs provides several methods for calculating confidence intervals for odds ratios, each with its own advantages. The choice of method depends on your data characteristics, computational resources, and statistical requirements.

Key takeaways:
- For small samples or tables with zeros, use conditional methods
- For balanced power and coverage, consider MidP or Blaker
- For large samples where computational efficiency is important, use Wald-Haldane
- When comparing methods, look at CI width and coverage properties