In [None]:
import matplotlib.pyplot as plt
import numpy as np
from math import comb, factorial
from scipy.stats import beta

def calcPK(n, c, k):
    if n - c < k:
        return 1.0
    return 1.0 - np.prod(1.0 - k /np.arange(n - c + 1, n + 1))

def calcGeo(n, c, k):
    return 1 - (1 - c/n) ** k

def calcVar(n, k, p):
    var = 0
    for i in range(n+1):
        var += comb(n-i, k) * p**i / comb(n, k) * (comb(n-k, i) * (1-p)**(n-i))
    return var - (1-p)**(2*k)

def calcVar2(n, k, p):
    var = 0
    for i in range(n+1):
        # var += comb(n-k, i) / comb(n, i) * comb(n-k, i) * p ** i * (1-p) ** (n-i)
        var += comb(n-k, i) / comb(n, i) * comb(n-k, i) * p ** i * (1-p) ** (n-i)
    return var - (1-p)**(2*k)

def calcEstVar(n, k, c):
    p = c / n 
    ALPHA = 0.05

    low_p = beta.ppf(ALPHA/2, c, n-c+1)
    high_p = beta.ppf(1-ALPHA/2, c+1, n-c)
    # low_p = max(0, p - 1.96 * np.sqrt(p * (1-p) / n))
    # high_p = min(1, p + 1.96 * np.sqrt(p * (1-p) / n))

    if c == 0:
        high_p = 3 / n
        low_p = 0
    elif c == n:
        low_p = 1 - 3 / n
        high_p = 1
    ps = [p]
    vars = []
    for choose_p in ps:
        var = 0
        for i in range(n+1):
            var += comb(n-i, k) * choose_p**i / comb(n, k) * (comb(n-k, i) * (1-choose_p)**(n-i))
        vars.append(var - (1-choose_p)**(2*k))
    return max(vars)
    # return calcPK(n, c, k) * (1 - calcPK(n, c, k))

import random
def trial(p: float) -> bool:
    return random.random() < p

def atk(k: int, p: float) -> bool:
    for _ in range(k):
        if trial(p):
            return True
    return False

def run_exp(p: float, k_values, n: int, is_pk: bool) -> tuple[list[float], int]:
    trials = [trial(p) for _ in range(n)]
    c = sum(trials)
    results = []
    for k in k_values:
        if is_pk:
            results.append(calcPK(n, c, k))
        else:
            results.append(calcGeo(n, c, k))
    return results, c

# p_values = [0.002, 0.00001, 0.2, 0.3, 0.9]
p_values = [0.002]
n = 200  # Fixed high n value


k_values = np.round(np.logspace(0, np.log10(n), 20)).astype(int)
NUM_TRIALS = 50

# Initialize lists to store results, standard deviations, and variances
results = []
std_devs = []
empirical_vars = []
theoretical_vars = []
t2 = []
estimated_vars = []

# Run experiments
trial_results = []
for _ in range(NUM_TRIALS):
    p_results = []
    est_vars_per_p = []
    for p in p_values:
        result_list, c = run_exp(p, k_values, n, is_pk=True)
        p_results.append(result_list)
        est_vars = []
        for k in k_values:
            est_vars.append(calcEstVar(n, k, c))
        est_vars_per_p.append(est_vars)

    trial_results.append(np.mean(np.array(p_results), axis=0))
    estimated_vars.append(np.sqrt(np.sum(est_vars_per_p, axis=0) / len(p_values) ** 2))

trial_results = np.stack(trial_results, axis=0)
estimated_vars = np.stack(estimated_vars, axis=0)
estimated_vars = np.mean(estimated_vars, axis=0)
results = np.mean(trial_results, axis=0)
std_devs = np.std(trial_results, axis=0)
empirical_vars = np.std(trial_results, axis=0)

for k in k_values:
    theoretical_vars.append(np.sqrt(sum(calcVar(n, k, p) for p in p_values) / len(p_values) ** 2))
    t2.append(np.sqrt(sum(calcVar2(n, k, p) for p in p_values) / len(p_values) ** 2))

# Plotting
plt.figure(figsize=(10, 6))

theoretical_vars = np.array(theoretical_vars)

plt.plot(k_values, results, label=f'n={n}')
plt.fill_between(k_values, results - std_devs, results + std_devs, alpha=0.2)

plt.title("Pass@k vs k")
plt.xlabel('k')
plt.xscale('log')
plt.ylabel('Pass@k')
plt.legend()
plt.grid(True)
plt.show()

# Plotting variance comparison
plt.figure(figsize=(10, 6))
plt.plot(k_values, empirical_vars, label='Empirical std')
plt.plot(k_values, theoretical_vars, label='Theoretical std', linestyle='--')
plt.plot(k_values, t2, label='Theoretical std 2', linestyle='--')
plt.plot(k_values, estimated_vars, label='Estimated std', linestyle='-.')
# plt.plot(k_values, estimated_vars * np.log(np.arange(1, 100) + 1) ** 2, label="AAA")
# Calculate the hline value
hline_values = [(1 - (1 - p) ** n) * (1 - p) ** n for p in p_values]
hline_avg = np.sqrt(np.sum(hline_values) / len(p_values) ** 2)
# Plot the hline
plt.axhline(y=hline_avg, color='r', linestyle='-', label='hline (avg across p)')

# Calculate and plot the new hline
new_hline_values = [p * (1 - p) / n for p in p_values]
new_hline_avg = np.sqrt(np.sum(new_hline_values) / len(p_values) ** 2)
plt.axhline(y=new_hline_avg, color='b', linestyle='-', label='new hline (avg across p)')
plt.title('Variance Comparison')
plt.xlabel('k')
plt.xscale('log')
plt.ylabel('Variance')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
p = 0.3
c = 3
n = 15
k = 1
display(comb(n, c) * p**c * (1-p)**(n-c) * (comb(n-c, k) / comb(n, k)) ** 2)
display(comb(n-k, c)**2 / comb(n, c) * p**c * (1-p)**(n-c))


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define parameters
p_values_set_1 = [0.04]
n = 400
k_values = np.arange(1, n + 1)

# Compute Pass@k values
pass_at_k_set_1 = [1 - (1 - p) ** k_values for p in p_values_set_1]
average_pass_at_k_set_1 = np.mean(pass_at_k_set_1, axis=0)

# Create figure and axis
fig, ax = plt.subplots(figsize=(13, 8))

# Plot average Pass@k for the set
# ax.plot(k_values, average_pass_at_k_set_1, color='black', linewidth=3.5, linestyle='-', label='Average (Set 1)')
for p in p_values_set_1:
    ax.plot(k_values, 1 - (1 - p) ** k_values, linewidth=3.5, label=f'p={p}', linestyle='-', alpha=0.75)

# Set plot title and labels
ax.set_title('Pass@k for Solve Probability p=0.04', fontsize=26)
ax.set_xlabel('k', fontsize=20)
ax.set_ylabel('Pass@k', fontsize=20)
ax.set_xscale('log')

# Configure legend and grid
ax.legend(fontsize=18, frameon=True, fancybox=True, framealpha=0.8, loc='lower right')
ax.grid(True, linestyle='--', alpha=0.7)
ax.tick_params(axis='both', which='major', labelsize=16)

# Adjust layout and save figure
plt.tight_layout()
plt.savefig("pass_at_k.pdf", format="pdf", dpi=300)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define parameters
p_values_set_1 = [0.001, 0.7, 0.9]
p_values_set_2 = [0.01, 0.1, 0.25]
n = 400
k_values = np.arange(1, n + 1)

# Compute Pass@k values
pass_at_k_set_1 = [1 - (1 - p) ** k_values for p in p_values_set_1]
pass_at_k_set_2 = [1 - (1 - p) ** k_values for p in p_values_set_2]
average_pass_at_k_set_1 = np.mean(pass_at_k_set_1, axis=0)
average_pass_at_k_set_2 = np.mean(pass_at_k_set_2, axis=0)

# Create figure and axis
fig, ax = plt.subplots(figsize=(13, 8))

# Plot average Pass@k for both sets
ax.plot(k_values, average_pass_at_k_set_1, color='black', linewidth=3.5, linestyle='-', label='Average (Set 1)')
for p in p_values_set_1:
    ax.plot(k_values, 1 - (1 - p) ** k_values, linewidth=1.8, label=f'p={p} (Set 1)', linestyle='-', alpha=0.75)

ax.plot(k_values, average_pass_at_k_set_2, color='red', linewidth=3.5, linestyle='-.', label='Average (Set 2)')
for p in p_values_set_2:
    ax.plot(k_values, 1 - (1 - p) ** k_values, linewidth=1.8, linestyle='-.', label=f'p={p} (Set 2)', alpha=0.75)

# Set plot title and labels
ax.set_title('Pass@k Across Different Solve Probability Sets', fontsize=26)
ax.set_xlabel('k', fontsize=20)
ax.set_ylabel('Pass@k', fontsize=20)
ax.set_xscale('log')

# Configure legend and grid
ax.legend(fontsize=18, frameon=True, fancybox=True, framealpha=0.8, loc='lower right')
ax.grid(True, linestyle='--', alpha=0.7)
ax.tick_params(axis='both', which='major', labelsize=16)

# Adjust layout and save figure
plt.tight_layout()
plt.savefig("pass_at_k_sets.pdf", format="pdf", dpi=300)


In [None]:
display(trial_results[:, -5])
plt.hist(trial_results[:, -5])

In [None]:
# plt.plot(estimated_vars * np.arange(1, 100) / 20)
# plt.plot(theoretical_vars)
plt.plot(theoretical_vars / (estimated_vars * np.log10(k_values+1)))

hline_values = [(1 - (1 - p) ** n) * (1 - p) ** n for p in p_values]
hline_avg = np.sqrt(np.sum(hline_values) / len(p_values) ** 2)
# Plot the hline
plt.axhline(y=hline_avg, color='r', linestyle='-', label='hline (avg across p)')

plt.xscale('log')

In [None]:
estimated_vars