<a href="https://colab.research.google.com/github/taylor33189-beep/Taylor_Hoskins_Repository/blob/main/The_bootstrap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np

# --- 1. Load and Prepare Data ---
# Load data, handling delimiters
try:
    df = pd.read_csv("/content/bodyfat (1).csv", delimiter=r'[,\s]+', header=None, names=['BodyFat', 'Sex'], engine='python')
except Exception as e:
    df = pd.read_csv("/content/bodyfat (1).csv", header=None, names=['BodyFat', 'Sex'])

# Separate data by sex
male_body_fat = df[df['Sex'] == 1]['BodyFat'].values
female_body_fat = df[df['Sex'] == 2]['BodyFat'].values

# --- 2. Calculate Observed Difference ---
# Calculate mean body fat for each sex
mean_male = np.mean(male_body_fat)
mean_female = np.mean(female_body_fat)

# Calculate the difference
observed_difference = mean_female - mean_male

# --- 3. Set up Bootstrapping ---
# Number of bootstrap replicates
num_bootstrap_replicates = 10000
# Array to store differences
bootstrap_differences = np.empty(num_bootstrap_replicates)

# --- 4. Perform Bootstrapping ---
# Loop through replicates
for i in range(num_bootstrap_replicates):
    # Resample data with replacement
    male_resample = np.random.choice(male_body_fat, size=len(male_body_fat), replace=True)
    female_resample = np.random.choice(female_body_fat, size=len(female_body_fat), replace=True)

    # Calculate mean difference of resamples and store
    bootstrap_differences[i] = np.mean(female_resample) - np.mean(male_resample)

# --- 5. Calculate Confidence Intervals ---
# **Method 1: Empirical (Percentile) CI**
# Find the 2.5th and 97.5th percentiles
empirical_ci = np.percentile(bootstrap_differences, [2.5, 97.5])

# **Method 2: Parametric (Normal Approximation) CI**
# Calculate bootstrap standard error (SE)
bootstrap_standard_error = np.std(bootstrap_differences)

# Critical z-value for 95% CI
z_critical = 1.96

# Calculate CI bounds
parametric_ci_lower = observed_difference - z_critical * bootstrap_standard_error
parametric_ci_upper = observed_difference + z_critical * bootstrap_standard_error
parametric_ci = np.array([parametric_ci_lower, parametric_ci_upper])

# --- 6. Print Results ---
print(f"Observed Difference (Female - Male): {observed_difference:.3f}")
print(f"Bootstrap Standard Error: {bootstrap_standard_error:.3f}")
print(f"95% Empirical (Percentile) CI: {empirical_ci}")
print(f"95% Parametric (Normal Approx) CI: {parametric_ci}")

Observed Difference (Female - Male): 7.344
Bootstrap Standard Error: 2.420
95% Empirical (Percentile) CI: [ 2.60838462 12.05848077]
95% Parametric (Normal Approx) CI: [ 2.60011551 12.08757679]
