In [None]:
import numpy as np
import matplotlib.pyplot as plt
from KRR import KRR_covariate_shift
from sklearn.metrics import mean_squared_error
import pandas as pd

# Parameters (fixed for this simulation)
sigma = 1  # Standard deviation of noise
beta = 2   # Ratio parameter in the grid of lambdas
B = 5      # Parameter of distribution shift
N_test = 10000  # Number of test samples
num_repeats = 500  # Number of simulations per condition (reduced for demonstration)

# Factors for the simulation study
fcn_levels = ['C', 'S', 'V', 'W', 'x']
n_values = range(500, 1200, 100)  # Values for n: 500, 600, 700, 800, 900, 1000

# Store results in a dictionary where keys are (fcn, n) tuples
all_results = {}

# Perform the simulation study
for fcn in fcn_levels:
    for n in n_values:
        n_0 = n  # Number of unlabeled target samples (keeping it fixed for simplicity)
        print(f"Running simulation for fcn='{fcn}', n={n}")
        err_pseudo_list = []
        err_naive_list = []
        err_real_list = []
        err_tabpfn_list = []
        err_krr_list = []
        err_krr_iw_list = []

        for i in range(num_repeats):
            seed = 100 + i
            test = KRR_covariate_shift(n, n_0, B, sigma, fcn, seed)
            test.fit(beta=beta)
            test.evaluate_final(N_test=N_test, seed=seed)

            err_pseudo_list.append(test.err_pseudo)
            err_naive_list.append(test.err_naive)
            err_real_list.append(test.err_real)
            err_tabpfn_list.append(test.err_tabpfn)
            err_krr_list.append(test.err_krr)
            err_krr_iw_list.append(test.err_krr_iw)

        # Calculate and store the mean errors
        all_results[(fcn, n)] = {
            "Pseudo Mean": np.mean(err_pseudo_list),
            "Naive Mean": np.mean(err_naive_list),
            "Real Mean": np.mean(err_real_list),
            "TabPFN Mean": np.mean(err_tabpfn_list),
            "KRR Mean": np.mean(err_krr_list),
            "KRR IW Mean": np.mean(err_krr_iw_list),
        }

# Print the results
print("\nSimulation Study Results (Mean Errors):")
for (fcn, n), means in all_results.items():
    print(f"fcn='{fcn}', n={n}:")
    for method, mean_error in means.items():
        print(f"  {method}: {mean_error:.4f}")

# Optional: Convert results to a Pandas DataFrame for easier analysis
results_df = pd.DataFrame.from_dict(all_results, orient='index')
results_df.index = pd.MultiIndex.from_tuples(results_df.index, names=['fcn', 'n'])

csv_filename = "covariate_shift_simulation_results.csv"
results_df.to_csv(csv_filename, index=False, float_format='%.4f')

Running simulation for fcn='C', n=500
Running simulation for fcn='C', n=700
Running simulation for fcn='C', n=900
Running simulation for fcn='C', n=1100
Running simulation for fcn='S', n=500
Running simulation for fcn='S', n=700
Running simulation for fcn='S', n=900
Running simulation for fcn='S', n=1100
Running simulation for fcn='V', n=500
Running simulation for fcn='V', n=700
Running simulation for fcn='V', n=900
Running simulation for fcn='V', n=1100
Running simulation for fcn='W', n=500
Running simulation for fcn='W', n=700
Running simulation for fcn='W', n=900
Running simulation for fcn='W', n=1100
Running simulation for fcn='x', n=500
Running simulation for fcn='x', n=700
Running simulation for fcn='x', n=900
Running simulation for fcn='x', n=1100

Simulation Study Results (Mean Errors):
fcn='C', n=500:
  Pseudo Mean: 0.0577
  Naive Mean: 0.0563
  Real Mean: 0.0446
  TabPFN Mean: 0.0289
  KRR Mean: 0.0609
  KRR IW Mean: 0.1527
fcn='C', n=700:
  Pseudo Mean: 0.0458
  Naive Mean: 

In [2]:
print("\nResults DataFrame:")
print(results_df)


Results DataFrame:
          Pseudo Mean  Naive Mean  Real Mean  TabPFN Mean  KRR Mean  \
fcn n                                                                 
C   500      0.057724    0.056253   0.044639     0.028877  0.060888   
    700      0.045826    0.046187   0.036818     0.022781  0.049753   
    900      0.036908    0.038537   0.030815     0.019055  0.044232   
    1100     0.031149    0.031199   0.025805     0.016440  0.039050   
S   500      0.067379    0.066048   0.052288     0.039588  0.070349   
    700      0.051675    0.052180   0.041000     0.028441  0.054033   
    900      0.042009    0.043786   0.035447     0.022664  0.047549   
    1100     0.036378    0.037276   0.029942     0.018924  0.043058   
V   500      0.026191    0.021545   0.013468     0.018753  0.022074   
    700      0.023034    0.018136   0.012541     0.017190  0.019083   
    900      0.018864    0.016635   0.011323     0.015315  0.017321   
    1100     0.018253    0.014124   0.009790     0.012185