### Jackknife

In [54]:
import numpy as np
import pandas as pd

# Define a statistic (mean in this case)
def statistic_fn(data):
    return np.mean(data)

# Function to return the true values (mean and stderr)
def statistic_true_values(n):
    true_mean = 0
    true_stderr = 1 / np.sqrt(n)
    return true_mean, true_stderr

# Define the Bootstrap function
def bootstrap(data, statistic_fn, num_resamples=10000):
    bootstrap_samples = []
    n = len(data)
    for _ in range(num_resamples):
        resample = np.random.choice(data, size=n, replace=True)
        bootstrap_samples.append(statistic_fn(resample))
    bootstrap_mean = np.mean(bootstrap_samples)
    bootstrap_stderr = np.std(bootstrap_samples, ddof=1)
    return bootstrap_mean, bootstrap_stderr

# Define the Jackknife function
def jackknife(data, statistic_fn):
    n = len(data)
    original_statistic = statistic_fn(data)
    leave_one_out_statistics = np.array([statistic_fn(np.delete(data, i)) for i in range(n)])
    pseudo_values = n * original_statistic - (n - 1) * leave_one_out_statistics
    jackknife_mean = np.mean(pseudo_values)
    jackknife_stderr = np.sqrt((n - 1) / n * np.sum((leave_one_out_statistics - original_statistic) ** 2))
    return jackknife_mean, jackknife_stderr

# Simulate data from N(0, 1)
np.random.seed(42)
N = 20
data = np.random.normal(0, 1, N)

# Get true values using the statistic_true_values function
true_mean, true_stderr = statistic_true_values(N)

# Apply Bootstrap to the statistic function
bootstrap_mean, bootstrap_stderr = bootstrap(data, statistic_fn)

# Apply Jackknife to the statistic function
jackknife_mean, jackknife_stderr = jackknife(data, statistic_fn)

# Prepare the results in a dictionary
results = {
    'Estimator': ['Jackknife', 'Bootstrap', 'True'],
    'Estimate': [jackknife_mean, bootstrap_mean, true_mean],
    'Standard Error': [jackknife_stderr, bootstrap_stderr, true_stderr]
}

# Create a DataFrame from the results dictionary
results_df = pd.DataFrame(results)

# Display the results
display(results_df)

Unnamed: 0,Estimator,Estimate,Standard Error
0,Jackknife,-0.171299,0.214669
1,Bootstrap,-0.171305,0.209835
2,True,0.0,0.223607


In [57]:
import numpy as np
import pandas as pd

# Define the statistic function for uncorrected sample variance
def statistic_fn(data):
    n = len(data)
    return np.sum((data - np.mean(data)) ** 2) / n  # uncorrected variance

# Function to return the true values (mean, stderr, and uncorrected sample variance)
def statistic_true_values(n, sigma_squared):
    # True population variance (uncorrected sample variance is biased)
    true_variance = sigma_squared
    
    # True standard error for sample variance
    true_stderr = np.sqrt(2 * sigma_squared**2 / (n - 1))
    
    # Bias-corrected sample variance: (n-1)/n * true variance
    true_uncorrected_variance = (n - 1) / n * sigma_squared
    
    return true_uncorrected_variance, true_stderr

# Define the Bootstrap function
def bootstrap(data, statistic_fn, num_resamples=1000):
    bootstrap_samples = []
    n = len(data)
    for _ in range(num_resamples):
        resample = np.random.choice(data, size=n, replace=True)
        bootstrap_samples.append(statistic_fn(resample))
    bootstrap_mean = np.mean(bootstrap_samples)
    bootstrap_stderr = np.std(bootstrap_samples, ddof=1)
    return bootstrap_mean, bootstrap_stderr

# Define the Jackknife function
def jackknife(data, statistic_fn):
    n = len(data)
    original_statistic = statistic_fn(data)
    leave_one_out_statistics = np.array([statistic_fn(np.delete(data, i)) for i in range(n)])
    pseudo_values = n * original_statistic - (n - 1) * leave_one_out_statistics
    jackknife_mean = np.mean(pseudo_values)
    jackknife_stderr = np.sqrt((n - 1) / n * np.sum((leave_one_out_statistics - original_statistic) ** 2))
    return jackknife_mean, jackknife_stderr

# Simulate data from N(0, 1)
np.random.seed(42)
N = 1000
sigma_squared = 1  # True population variance
data = np.random.normal(0, np.sqrt(sigma_squared), N)

# Get true values using the statistic_true_values function
true_uncorrected_variance, true_stderr = statistic_true_values(N, sigma_squared)

# Apply Bootstrap to the statistic function
bootstrap_mean, bootstrap_stderr = bootstrap(data, statistic_fn)

# Apply Jackknife to the statistic function
jackknife_mean, jackknife_stderr = jackknife(data, statistic_fn)

# Prepare the results in a dictionary
results = {
    'Estimator': ['Jackknife', 'Bootstrap', 'True'],
    'Estimate': [jackknife_mean, bootstrap_mean, true_uncorrected_variance],
    'Standard Error': [jackknife_stderr, bootstrap_stderr, true_stderr]
}

# Create a DataFrame from the results dictionary
results_df = pd.DataFrame(results)

# Display the results
results_df

Unnamed: 0,Estimator,Estimate,Standard Error
0,Jackknife,0.958864,0.043607
1,Bootstrap,0.956677,0.044689
2,True,0.999,0.044744
