In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from ConfSelect import weighted_BH, weighted_CS, eval_sel
q=0.5

# Define the quantiles
quantiles = [0.6, 0.7, 0.8, 0.9]

# Initialize a DataFrame to store all results
all_results = []

# Loop over different quantiles
for quantile in quantiles:

# Initialize lists to store results for each metric
  results_fdp = []
  results_power = []
  results_nsel = []

#seed = 8

# Loop over 100 different seeds
  for seed in range(1, 101):
    # Step 1: Generate samples
    np.random.seed(seed)  # For reproducibility
    X1 = np.random.normal(0.5, 0.5, 16000)
    X2 = np.random.normal(0, 0.3, 1000)

# Step 2: Combine X1 and X2, generate Y
    X = np.concatenate((X1, X2))
    e = np.random.normal(0, 0.3, X.shape[0])
    Y = X - X**3 + e

# Preparing the data for linear regression (adding X^2 and X^3)
    X_poly = np.vstack([X, X**2, X**3]).T
#X_poly = np.vstack([X]).T

# Model for X1
    X1_poly = np.vstack([X1, X1**2, X1**3]).T
#X1_poly = np.vstack([X1]).T

# Model for X2
    X2_poly = np.vstack([X2, X2**2, X2**3]).T
#X2_poly = np.vstack([X2]).T

#model for training dataset
    ttrain = X1_poly[:8000]
    model = LinearRegression().fit(ttrain, Y[:8000])

# Calculating the R-squared value
    r_squared = model.score(ttrain, Y[:8000])

# Printing the R-squared value
   # print("R-squared:", r_squared)

    y_train = Y[:8000]

    dcalib = X1_poly[8000:16000]
    dtest = X2_poly

    dother = np.concatenate((dcalib,dtest))
    all_pred = model.predict(dother)
    train_pred = model.predict(ttrain)


    hat_mu_calib = np.array(model.predict(dcalib))
    hat_mu_test = np.array(model.predict(dtest))
    y_calib = Y[8000:16000]
    w_calib = np.ones(8000)
    y_test = Y[16000:]
    w_test = np.ones(1000)

#c = 0
    c = np.quantile(y_train, quantile) 

 
    calib_scores_res = y_calib - hat_mu_calib
    calib_scores_sub = - hat_mu_calib 
    calib_scores_clip = 100 * (y_calib > c) + c * (y_calib <= c) - hat_mu_calib
 
    test_scores = c - hat_mu_test

 
# ========================= 
# ## weighted BH procedure
# ========================= 

# use scores res, sub, and clip
#BH_res = weighted_BH(calib_scores_res, w_calib, test_scores, w_test, q)  
#BH_sub = weighted_BH(calib_scores_sub[y_calib <= c], w_calib[y_calib<=c], test_scores, w_test, q) 
    BH_clip = weighted_BH(calib_scores_clip, w_calib, test_scores, w_test, q)

# Assuming BH_clip[0] contains the integer indices for the "clip" case
    BH_clip_indices = BH_clip[0]
# Now pass these indices to the eval_sel function
    BH_clip_fdp, BH_clip_power = eval_sel(BH_clip_indices, y_test, np.array([c]*len(y_test)))


# Organize BH results for DataFrame
#fdp = [BH_res_fdp, BH_sub_fdp, BH_clip_fdp]
#power = [BH_res_power, BH_sub_power, BH_clip_power]
#nsel = [len(BH_res_indices), len(BH_sub_indices), len(BH_clip_indices)]
#ndiff = [0] * 3  # Assuming no difference for BH-only results
#nsame = [len(BH_res_indices), len(BH_sub_indices), len(BH_clip_indices)]  # Assuming all selections are the same for BH-only results

   # Append results of this iteration
    results_fdp.append(BH_clip_fdp)
    results_power.append(BH_clip_power)
    results_nsel.append(len(BH_clip_indices))

# For each quantile, calculate mean and variance, and append to all_results DataFrame
  quantile_results = pd.DataFrame({
        'quantile': quantile,
        'mean_fdp': np.mean(results_fdp),
        'var_fdp': np.var(results_fdp),
        'mean_power': np.mean(results_power),
        'var_power': np.var(results_power),
        'mean_nsel': np.mean(results_nsel),
        'var_nsel': np.var(results_nsel),
    }, index=[0])  # index=[0] is necessary to ensure each quantile's results are appended as a new row
    
  all_results.append(quantile_results)

# Concatenate all quantile results into a single DataFrame
res_BH_only = pd.concat(all_results, ignore_index=True)

# Display or save the final results
print(res_BH_only)


   quantile  mean_fdp   var_fdp  mean_power  var_power  mean_nsel     var_nsel
0       0.6  0.295099  0.115751    0.332233   0.219651     330.20  219748.5200
1       0.7  0.112167  0.083888    0.001418   0.000010       0.48       0.8696
2       0.8  0.117500  0.101819    0.000505   0.000004       0.21       0.3059
3       0.9  0.095000  0.083475    0.000950   0.000017       0.15       0.1475


In [2]:
import os

save_path = "./simulation"
if not os.path.exists(save_path): 
   os.makedirs(save_path)

res_BH_only.to_csv("./simulation/UPUSm3.csv")