## Simulation: compare prediction performances

Notes:
- Cross-validation is performed only on the first fold. 
- Test performance is measured using $n$ test data samples.

In [1]:
# Run this cell ONLY ONCE to set up the path
import os, sys
os.chdir('..')
# sys.path.insert(0, os.path.abspath('..'))

import numpy as np
import pandas as pd

import pickle

MASTER_SEED = 20241225

In [2]:
# Functions for simulation experiments
from reproducibility.simulations import Y1, Y2, Y3, Y4, logistic_normal, repeat_prediction

## CKDR-$m^\star$

In [None]:
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y1, Y2, Y3, Y4]:
        res = repeat_prediction(n, p=100, Y_func=Y_func, X_func=logistic_normal, m=None, njobs=100, reps=100,
                                load=True, seed=MASTER_SEED)
        results.append(res)

In [3]:
for Y_func in [Y1, Y2, Y3, Y4]:
    print(f"Results for {Y_func.__name__}:")
    for n in [200, 500, 1000]:
        m = None
        reps = 100
        with open("./results/simulation/predictions/{}_{}_{}.pickle".format(n, Y_func.__name__, m), "rb") as f:
            results = pickle.load(f)
        print(f"    n={n}:")
        print("\tMean {}: {:.4f}".format("Accuracy" if Y_func in [Y3, Y4] else "MSE", results.mean(0)), "+/-", "SE: {:.4f}".format(results.std(0) / np.sqrt(reps)))

Results for Y1:
    n=200:
	Mean MSE: 0.0180 +/- SE: 0.0004
    n=500:
	Mean MSE: 0.0132 +/- SE: 0.0001
    n=1000:
	Mean MSE: 0.0121 +/- SE: 0.0001
Results for Y2:
    n=200:
	Mean MSE: 0.0697 +/- SE: 0.0033
    n=500:
	Mean MSE: 0.0386 +/- SE: 0.0031
    n=1000:
	Mean MSE: 0.0238 +/- SE: 0.0011
Results for Y3:
    n=200:
	Mean Accuracy: 0.8467 +/- SE: 0.0030
    n=500:
	Mean Accuracy: 0.9133 +/- SE: 0.0014
    n=1000:
	Mean Accuracy: 0.9320 +/- SE: 0.0008
Results for Y4:
    n=200:
	Mean Accuracy: 0.8196 +/- SE: 0.0028
    n=500:
	Mean Accuracy: 0.8846 +/- SE: 0.0016
    n=1000:
	Mean Accuracy: 0.8937 +/- SE: 0.0011


## CKDR$^*$

In [None]:
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y1, Y2, Y3, Y4]:
        res = repeat_prediction(n, p=100, Y_func=Y_func, X_func=logistic_normal, m="cv", load=True, seed=MASTER_SEED)
        results.append(res)

In [4]:
for Y_func in [Y1, Y2, Y3, Y4]:
    print(f"Results for {Y_func.__name__}:")
    for n in [200, 500, 1000]:
        m = "cv"
        reps = 100
        with open("./results/simulation/predictions/{}_{}_{}.pickle".format(n, Y_func.__name__, m), "rb") as f:
            results = pickle.load(f)
        print(f"    n={n}:")
        print("\tMean {}: {:.4f}".format("Accuracy" if Y_func in [Y3, Y4] else "MSE", results.mean(0)), "+/-", "SE: {:.4f}".format(results.std(0) / np.sqrt(reps)))

Results for Y1:
    n=200:
	Mean MSE: 0.0174 +/- SE: 0.0002
    n=500:
	Mean MSE: 0.0133 +/- SE: 0.0001
    n=1000:
	Mean MSE: 0.0121 +/- SE: 0.0001
Results for Y2:
    n=200:
	Mean MSE: 0.0821 +/- SE: 0.0081
    n=500:
	Mean MSE: 0.0365 +/- SE: 0.0016
    n=1000:
	Mean MSE: 0.0250 +/- SE: 0.0014
Results for Y3:
    n=200:
	Mean Accuracy: 0.8439 +/- SE: 0.0029
    n=500:
	Mean Accuracy: 0.8988 +/- SE: 0.0015
    n=1000:
	Mean Accuracy: 0.9321 +/- SE: 0.0010
Results for Y4:
    n=200:
	Mean Accuracy: 0.8316 +/- SE: 0.0028
    n=500:
	Mean Accuracy: 0.8779 +/- SE: 0.0019
    n=1000:
	Mean Accuracy: 0.8980 +/- SE: 0.0010


## LC-Lasso

In [3]:
from reproducibility.simulations import repeat_lc_lasso_clf, repeat_lc_lasso_reg

In [None]:
# Regression part (45 sec with 100 cores)
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y1, Y2]:
        print(f"Running simulation: n={n}, Y_func={Y_func.__name__}")
        res = repeat_lc_lasso_reg(n, p=100, Y_func=Y_func, X_func=logistic_normal, njobs=100, reps=100, verbose=True,
                                  seed=MASTER_SEED)
        results.append(res)
        print(f"Completed: n={n}, Y_func={Y_func.__name__}, Mean mse: {res.mean():.4f}")

In [None]:
# Classification part (4 mins with 100 cores)
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y3, Y4]:
        print(f"Running simulation: n={n}, Y_func={Y_func.__name__}")
        res = repeat_lc_lasso_clf(n, p=100, Y_func=Y_func, X_func=logistic_normal, njobs=100, reps=100,
                                  seed=MASTER_SEED)
        results.append(res)
        print(f"Completed: n={n}, Y_func={Y_func.__name__}, Mean accuracy: {res.mean():.4f}")

In [5]:
for Y_func in [Y1, Y2, Y3, Y4]:
    print(f"Results for {Y_func.__name__}:")
    for n in [200, 500, 1000]:
        reps = 100
        with open("./results/simulation/lc_lasso_results/{}_{}.pickle".format(n, Y_func.__name__), "rb") as f:
            results = pickle.load(f)
        print(f"    n={n}:")
        print("\tMean {}: {:.4f}".format("Accuracy" if Y_func in [Y3, Y4] else "MSE", results.mean(0)), "+/-", "SE: {:.4f}".format(results.std(0) / np.sqrt(reps)))

Results for Y1:
    n=200:
	Mean MSE: 0.0321 +/- SE: 0.0005
    n=500:
	Mean MSE: 0.0190 +/- SE: 0.0001
    n=1000:
	Mean MSE: 0.0169 +/- SE: 0.0001
Results for Y2:
    n=200:
	Mean MSE: 0.1645 +/- SE: 0.0053
    n=500:
	Mean MSE: 0.1070 +/- SE: 0.0021
    n=1000:
	Mean MSE: 0.0989 +/- SE: 0.0024
Results for Y3:
    n=200:
	Mean Accuracy: 0.7707 +/- SE: 0.0037
    n=500:
	Mean Accuracy: 0.8091 +/- SE: 0.0021
    n=1000:
	Mean Accuracy: 0.8463 +/- SE: 0.0011
Results for Y4:
    n=200:
	Mean Accuracy: 0.7140 +/- SE: 0.0035
    n=500:
	Mean Accuracy: 0.7879 +/- SE: 0.0019
    n=1000:
	Mean Accuracy: 0.8169 +/- SE: 0.0012


## clr-Kernel

In [7]:
from reproducibility.simulations import repeat_clr_kernel_krr, repeat_clr_kernel_svm

In [None]:
# Regression part (1min)
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y1, Y2]:
        print(f"Running simulation: n={n}, Y_func={Y_func.__name__}")
        res = repeat_clr_kernel_krr(n, p=100, Y_func=Y_func, X_func=logistic_normal, reps=100,
                                  seed=MASTER_SEED)
        results.append(res)
        print(f"Completed: n={n}, Y_func={Y_func.__name__}, Mean mse: {res.mean():.4f}")

In [None]:
# Classification part (1 min)
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y3, Y4]:
        print(f"Running simulation: n={n}, Y_func={Y_func.__name__}")
        res = repeat_clr_kernel_svm(n, p=100, Y_func=Y_func, X_func=logistic_normal, reps=100,
                                  seed=MASTER_SEED)
        results.append(res)
        print(f"Completed: n={n}, Y_func={Y_func.__name__}, Mean acc: {res.mean():.4f}")

In [10]:
for Y_func in [Y1, Y2, Y3, Y4]:
    print(f"Results for {Y_func.__name__}:")
    for n in [200, 500, 1000]:
        reps = 100
        with open("./results/simulation/clr_kernel_results/{}_{}.pickle".format(n, Y_func.__name__), "rb") as f:
            results = pickle.load(f)
        print(f"    n={n}:")
        print("\tMean {}: {:.4f}".format("Accuracy" if Y_func in [Y3, Y4] else "MSE", results.mean(0)), "+/-", "SE: {:.4f}".format(results.std(0) / np.sqrt(reps)))

Results for Y1:
    n=200:
	Mean MSE: 0.1253 +/- SE: 0.0019
    n=500:
	Mean MSE: 0.0901 +/- SE: 0.0008
    n=1000:
	Mean MSE: 0.0786 +/- SE: 0.0005
Results for Y2:
    n=200:
	Mean MSE: 0.1851 +/- SE: 0.0053
    n=500:
	Mean MSE: 0.1454 +/- SE: 0.0024
    n=1000:
	Mean MSE: 0.1362 +/- SE: 0.0025
Results for Y3:
    n=200:
	Mean Accuracy: 0.7756 +/- SE: 0.0035
    n=500:
	Mean Accuracy: 0.8203 +/- SE: 0.0021
    n=1000:
	Mean Accuracy: 0.8452 +/- SE: 0.0012
Results for Y4:
    n=200:
	Mean Accuracy: 0.7443 +/- SE: 0.0039
    n=500:
	Mean Accuracy: 0.7989 +/- SE: 0.0019
    n=1000:
	Mean Accuracy: 0.8216 +/- SE: 0.0011


## clr-RF

In [3]:
from reproducibility.simulations import repeat_clr_rf_reg, repeat_clr_rf_clf

In [None]:
# Regression part (1min with 100 cores)
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y1, Y2]:
        print(f"Running simulation: n={n}, Y_func={Y_func.__name__}")
        res = repeat_clr_rf_reg(n, p=100, Y_func=Y_func, X_func=logistic_normal, reps=100, njobs=100,
                                  seed=MASTER_SEED)
        results.append(res)
        print(f"Completed: n={n}, Y_func={Y_func.__name__}, Mean mse: {res.mean():.4f}")

In [None]:
# Classification part (1 min)
results = []
for n in [200, 500, 1000]:
    for Y_func in [Y3, Y4]:
        print(f"Running simulation: n={n}, Y_func={Y_func.__name__}")
        res = repeat_clr_rf_clf(n, p=100, Y_func=Y_func, X_func=logistic_normal, reps=100, njobs=100,
                                  seed=MASTER_SEED)
        results.append(res)
        print(f"Completed: n={n}, Y_func={Y_func.__name__}, Mean acc: {res.mean():.4f}")

In [6]:
for Y_func in [Y1, Y2, Y3, Y4]:
    print(f"Results for {Y_func.__name__}:")
    for n in [200, 500, 1000]:
        reps = 100
        with open("./results/simulation/clr_rf_results/{}_{}.pickle".format(n, Y_func.__name__), "rb") as f:
            results = pickle.load(f)
        print(f"    n={n}:")
        print("\tMean {}: {:.4f}".format("Accuracy" if Y_func in [Y3, Y4] else "MSE", results.mean(0)), "+/-", "SE: {:.4f}".format(results.std(0) / np.sqrt(reps)))

Results for Y1:
    n=200:
	Mean MSE: 0.3162 +/- SE: 0.0041
    n=500:
	Mean MSE: 0.2810 +/- SE: 0.0020
    n=1000:
	Mean MSE: 0.2623 +/- SE: 0.0014
Results for Y2:
    n=200:
	Mean MSE: 0.3446 +/- SE: 0.0076
    n=500:
	Mean MSE: 0.3151 +/- SE: 0.0040
    n=1000:
	Mean MSE: 0.3063 +/- SE: 0.0041
Results for Y3:
    n=200:
	Mean Accuracy: 0.6621 +/- SE: 0.0035
    n=500:
	Mean Accuracy: 0.7102 +/- SE: 0.0022
    n=1000:
	Mean Accuracy: 0.7419 +/- SE: 0.0016
Results for Y4:
    n=200:
	Mean Accuracy: 0.6390 +/- SE: 0.0036
    n=500:
	Mean Accuracy: 0.6851 +/- SE: 0.0025
    n=1000:
	Mean Accuracy: 0.7163 +/- SE: 0.0015


## RS-ES
probably

In [3]:
from scipy.io import loadmat

# Load RS-ES results from MATLAB files
rs_es_results = {}

# Sample sizes and Y functions to load
sample_sizes = [200, 500, 1000]
Y_func_names = ['Y1', 'Y2']

print("Loading RS-ES results...")
for n in sample_sizes:
    for Y_func_name in Y_func_names:
        filename = f'mse_rs_n{n}_{Y_func_name}.mat'
        filepath = f'./results/simulation/rs_es_results/{filename}'
        
        if os.path.exists(filepath):
            mat_data = loadmat(filepath)
            rs_es_results[(n, Y_func_name)] = {
                'mse_all': mat_data['mse_all']
            }
            # print(f"  Loaded: {filename}")
        else:
            print(f"  Missing: {filename}")

for (n, Y_func_name), data in rs_es_results.items():
    print(f"Results for n={n}, Y_func={Y_func_name}:")
    mse_all = data['mse_all']
    print(f"\tMean MSE: {np.mean(mse_all):.4f}, SE: {np.std(mse_all) / np.sqrt(100):.4f}")

Loading RS-ES results...
Results for n=200, Y_func=Y1:
	Mean MSE: 0.0204, SE: 0.0003
Results for n=200, Y_func=Y2:
	Mean MSE: 0.1295, SE: 0.0042
Results for n=500, Y_func=Y1:
	Mean MSE: 0.0124, SE: 0.0001
Results for n=500, Y_func=Y2:
	Mean MSE: 0.0964, SE: 0.0020
Results for n=1000, Y_func=Y1:
	Mean MSE: 0.0112, SE: 0.0000
Results for n=1000, Y_func=Y2:
	Mean MSE: 0.0910, SE: 0.0023


## Export table

In [9]:
import pandas as pd
import numpy as np
import pickle
import os
from scipy.io import loadmat

# Function to dynamically load all simulation results
def load_simulation_results():
    """Load all simulation results from pickle and mat files"""
    results_data = {}
    
    # Initialize the structure
    methods = ['CKDR-m*', 'CKDR*', 'LC-Lasso', 'clr-Kernel', 'clr-RF', 'RS-ES']
    y_funcs = ['Y1', 'Y2', 'Y3', 'Y4']
    sample_sizes = ['200', '500', '1000']
    
    for method in methods:
        results_data[method] = {}
        for y_func in y_funcs:
            results_data[method][y_func] = {}
            for n in sample_sizes:
                results_data[method][y_func][n] = {'mean': np.nan, 'se': np.nan}
    
    # Load CKDR-m* results (m=None)
    print("Loading CKDR-m* results...")
    for y_func in y_funcs:
        for n in sample_sizes:
            try:
                filepath = f"./results/simulation/predictions/{n}_{y_func}_None.pickle"
                if os.path.exists(filepath):
                    with open(filepath, "rb") as f:
                        results = pickle.load(f)
                    mean_val = results.mean(0)
                    se_val = results.std(0) / np.sqrt(len(results))
                    
                    # Convert accuracy to MCR for Y3, Y4
                    if y_func in ['Y3', 'Y4']:
                        mean_val = 1 - mean_val
                    
                    results_data['CKDR-m*'][y_func][n] = {'mean': mean_val, 'se': se_val}
                    # print(f"  Loaded: {y_func}, n={n}")
                else:
                    print(f"  Missing: {filepath}")
            except Exception as e:
                print(f"  Error loading CKDR-m* {y_func} n={n}: {e}")
    
    # Load CKDR* results (m="cv")
    print("Loading CKDR* results...")
    for y_func in y_funcs:
        for n in sample_sizes:
            try:
                filepath = f"./results/simulation/predictions/{n}_{y_func}_cv.pickle"
                if os.path.exists(filepath):
                    with open(filepath, "rb") as f:
                        results = pickle.load(f)
                    mean_val = results.mean(0)
                    se_val = results.std(0) / np.sqrt(len(results))
                    
                    # Convert accuracy to MCR for Y3, Y4
                    if y_func in ['Y3', 'Y4']:
                        mean_val = 1 - mean_val
                    
                    results_data['CKDR*'][y_func][n] = {'mean': mean_val, 'se': se_val}
                    # print(f"  Loaded: {y_func}, n={n}")
                else:
                    print(f"  Missing: {filepath}")
            except Exception as e:
                print(f"  Error loading CKDR* {y_func} n={n}: {e}")
    
    # Load LC-Lasso results
    print("Loading LC-Lasso results...")
    for y_func in y_funcs:
        for n in sample_sizes:
            try:
                filepath = f"./results/simulation/lc_lasso_results/{n}_{y_func}.pickle"
                if os.path.exists(filepath):
                    with open(filepath, "rb") as f:
                        results = pickle.load(f)
                    mean_val = results.mean(0)
                    se_val = results.std(0) / np.sqrt(len(results))
                    
                    # Convert accuracy to MCR for Y3, Y4
                    if y_func in ['Y3', 'Y4']:
                        mean_val = 1 - mean_val
                    
                    results_data['LC-Lasso'][y_func][n] = {'mean': mean_val, 'se': se_val}
                    # print(f"  Loaded: {y_func}, n={n}")
                else:
                    print(f"  Missing: {filepath}")
            except Exception as e:
                print(f"  Error loading LC-Lasso {y_func} n={n}: {e}")
    
    # Load clr-Kernel results
    print("Loading clr-Kernel results...")
    for y_func in y_funcs:
        for n in sample_sizes:
            try:
                filepath = f"./results/simulation/clr_kernel_results/{n}_{y_func}.pickle"
                if os.path.exists(filepath):
                    with open(filepath, "rb") as f:
                        results = pickle.load(f)
                    mean_val = results.mean(0)
                    se_val = results.std(0) / np.sqrt(len(results))
                    
                    # Convert accuracy to MCR for Y3, Y4
                    if y_func in ['Y3', 'Y4']:
                        mean_val = 1 - mean_val
                    
                    results_data['clr-Kernel'][y_func][n] = {'mean': mean_val, 'se': se_val}
                    # print(f"  Loaded: {y_func}, n={n}")
                else:
                    print(f"  Missing: {filepath}")
            except Exception as e:
                print(f"  Error loading clr-Kernel {y_func} n={n}: {e}")
    
    # Load clr-RF results
    print("Loading clr-RF results...")
    for y_func in y_funcs:
        for n in sample_sizes:
            try:
                filepath = f"./results/simulation/clr_rf_results/{n}_{y_func}.pickle"
                if os.path.exists(filepath):
                    with open(filepath, "rb") as f:
                        results = pickle.load(f)
                    mean_val = results.mean(0)
                    se_val = results.std(0) / np.sqrt(len(results))
                    
                    # Convert accuracy to MCR for Y3, Y4
                    if y_func in ['Y3', 'Y4']:
                        mean_val = 1 - mean_val
                    
                    results_data['clr-RF'][y_func][n] = {'mean': mean_val, 'se': se_val}
                    # print(f"  Loaded: {y_func}, n={n}")
                else:
                    print(f"  Missing: {filepath}")
            except Exception as e:
                print(f"  Error loading clr-RF {y_func} n={n}: {e}")
    
    # Load RS-ES results (only for Y1 and Y2)
    print("Loading RS-ES results...")
    for y_func in ['Y1', 'Y2']:
        for n in sample_sizes:
            try:
                filename = f'mse_rs_n{n}_{y_func}.mat'
                filepath = f'./results/simulation/rs_es_results/{filename}'
                if os.path.exists(filepath):
                    mat_data = loadmat(filepath)
                    mse_all = mat_data['mse_all'].flatten()
                    mean_val = np.mean(mse_all)
                    se_val = np.std(mse_all) / np.sqrt(len(mse_all))
                    
                    results_data['RS-ES'][y_func][n] = {'mean': mean_val, 'se': se_val}
                    # print(f"  Loaded: {y_func}, n={n}")
                else:
                    print(f"  Missing: {filepath}")
            except Exception as e:
                print(f"  Error loading RS-ES {y_func} n={n}: {e}")
    
    return results_data

# Load all results
results_data = load_simulation_results()

# Function to find the minimum value (optimal) for each setting and sample size
def find_optimal_values(results_data):
    optimal_values = {}
    
    for y_func in ['Y1', 'Y2', 'Y3', 'Y4']:
        optimal_values[y_func] = {}
        for n in ['200', '500', '1000']:
            values = []
            methods = []
            for method in results_data.keys():
                mean_val = results_data[method][y_func][n]['mean']
                if not np.isnan(mean_val):
                    values.append(mean_val)
                    methods.append(method)
            
            if values:
                min_val = min(values)
                optimal_values[y_func][n] = min_val
            else:
                optimal_values[y_func][n] = None
    
    return optimal_values

optimal_values = find_optimal_values(results_data)

# Function to format value with standard error and bold if optimal
def format_value_with_se(mean_val, se_val, is_optimal, precision=3):
    if np.isnan(mean_val) or np.isnan(se_val):
        return "--"
    
    mean_str = f"{mean_val:.{precision}f}"
    se_str = f"{se_val:.{precision}f}"
    formatted = f"{mean_str} ({se_str})"
    
    if is_optimal:
        return f"\\textbf{{{formatted}}}"
    return formatted

# Generate LaTeX table with standard errors
def generate_latex_table_with_se(results_data, optimal_values):
    latex_lines = []
    
    # Table header
    latex_lines.append("\\begin{table}[htbp]")
    latex_lines.append("\\centering")
    latex_lines.append("\\caption{Simulation Results: MSE for Settings (I) and (II), MCR for Settings (III) and (IV). Values are mean (standard error).}")
    latex_lines.append("\\label{tab:simulation_results}")
    latex_lines.append("\\begin{tabular}{llr" + "r" * len(results_data) + "}")
    latex_lines.append("\\toprule")
    
    # Header row
    header = "Metric & Setting & $n$ & " + " & ".join(results_data.keys()) + " \\\\ "
    latex_lines.append(header)
    latex_lines.append("\\midrule")
    
    # MSE section (Y1 and Y2)
    for i, (y_func, setting_name) in enumerate([('Y1', '(I)'), ('Y2', '(II)')]):
        for j, n in enumerate(['200', '500', '1000']):
            # First row of each setting includes metric and setting info
            if j == 0:
                if i == 0:  # First MSE setting
                    row = f"\\multirow{{6}}{{*}}{{MSE}} & \\multirow{{3}}{{*}}{{{setting_name}}} & {n}"
                else:  # Second MSE setting
                    row = f" & \\multirow{{3}}{{*}}{{{setting_name}}} & {n}"
            else:
                row = f" &  & {n}"
            
            # Add method results
            for method in results_data.keys():
                mean_val = results_data[method][y_func][n]['mean']
                se_val = results_data[method][y_func][n]['se']
                is_optimal = not np.isnan(mean_val) and abs(mean_val - optimal_values[y_func][n]) < 1e-6
                row += " & " + format_value_with_se(mean_val, se_val, is_optimal)
            
            row += " \\\\ "
            latex_lines.append(row)
    
    latex_lines.append("\\midrule")
    
    # MCR section (Y3 and Y4)
    for i, (y_func, setting_name) in enumerate([('Y3', '(III)'), ('Y4', '(IV)')]):
        for j, n in enumerate(['200', '500', '1000']):
            # First row of each setting includes metric and setting info
            if j == 0:
                if i == 0:  # First MCR setting
                    row = f"\\multirow{{6}}{{*}}{{MCR}} & \\multirow{{3}}{{*}}{{{setting_name}}} & {n}"
                else:  # Second MCR setting
                    row = f" & \\multirow{{3}}{{*}}{{{setting_name}}} & {n}"
            else:
                row = f" &  & {n}"
            
            # Add method results
            for method in results_data.keys():
                mean_val = results_data[method][y_func][n]['mean']
                se_val = results_data[method][y_func][n]['se']
                is_optimal = not np.isnan(mean_val) and abs(mean_val - optimal_values[y_func][n]) < 1e-6
                row += " & " + format_value_with_se(mean_val, se_val, is_optimal)
            
            row += " \\\\ "
            latex_lines.append(row)
    
    # Table footer
    latex_lines.append("\\bottomrule")
    latex_lines.append("\\end{tabular}")
    latex_lines.append("\\end{table}")
    
    return "\n".join(latex_lines)

# Generate the LaTeX table
latex_table = generate_latex_table_with_se(results_data, optimal_values)

print("LaTeX Table Code:")
print("=" * 60)
print(latex_table)
print("=" * 60)

# Also save to a file for easy copying
with open('results/tables/simulation_predictions.tex', 'w') as f:
    f.write(latex_table)

Loading CKDR-m* results...
Loading CKDR* results...
Loading LC-Lasso results...
Loading clr-Kernel results...
Loading clr-RF results...
Loading RS-ES results...
LaTeX Table Code:
\begin{table}[htbp]
\centering
\caption{Simulation Results: MSE for Settings (I) and (II), MCR for Settings (III) and (IV). Values are mean (standard error).}
\label{tab:simulation_results}
\begin{tabular}{llrrrrrrr}
\toprule
Metric & Setting & $n$ & CKDR-m* & CKDR* & LC-Lasso & clr-Kernel & clr-RF & RS-ES \\ 
\midrule
\multirow{6}{*}{MSE} & \multirow{3}{*}{(I)} & 200 & 0.018 (0.000) & \textbf{0.017 (0.000)} & 0.032 (0.000) & 0.125 (0.002) & 0.316 (0.004) & 0.020 (0.000) \\ 
 &  & 500 & 0.013 (0.000) & 0.013 (0.000) & 0.019 (0.000) & 0.090 (0.001) & 0.281 (0.002) & \textbf{0.012 (0.000)} \\ 
 &  & 1000 & 0.012 (0.000) & 0.012 (0.000) & 0.017 (0.000) & 0.079 (0.000) & 0.262 (0.001) & \textbf{0.011 (0.000)} \\ 
 & \multirow{3}{*}{(II)} & 200 & \textbf{0.070 (0.003)} & 0.082 (0.008) & 0.164 (0.005) & 0.185 (0.005