In [58]:
import numpy as np

def find_simulation_with_params(target_params: np.ndarray) -> str:
    simulation_to_rel_error = {}
    for simulation in range(2000):
        simulation_index = str(simulation).zfill(4)
        data_dir = f'/user_data/ajliang/Quijote/LH{simulation_index}'
        params_path = f'{data_dir}/params.npy'
        simulation_params = np.load(params_path)
        # drop the last three params, which are time dependent
        simulation_params = simulation_params[:-3]
        
        simulation_to_rel_error[simulation_index] = {
            param_name: abs(target_param_val - simulation_params[i]) / target_param_val
            for i, (param_name, target_param_val) in enumerate(target_params.items())
        }
    
    return simulation_to_rel_error
        
    
    # print(f'Found {len(found_simulations)} simulations')
    # closest_simulation = min(found_simulations, key=lambda index: np.linalg.norm(found_simulations[index] - target_params))
    # print(f'Closest simulation is LH{closest_simulation} with params {found_simulations[closest_simulation]}')

In [59]:
backward_model_fake_data_params = {
    'omega_m': 3.000000e-01,
    'omega_b': 5.000000e-02,
    'h': 7.000000e-01,
    'n_s': 9.650000e-01,
    'sigma_8': 7.999980e-01,
}

simulation_to_rel_error = find_simulation_with_params(target_params=backward_model_fake_data_params)

In [60]:
def sort_key(simulation_and_rel_error):
    _, rel_error = simulation_and_rel_error
    omega_m_rel_error = rel_error['omega_m']
    sigma_8_rel_error = rel_error['sigma_8']
    return omega_m_rel_error + sigma_8_rel_error

sorted_simulations = sorted(
    simulation_to_rel_error.items(),
    key=sort_key,
    reverse=True,
)

In [61]:
param_name_to_index = {'omega_m': 0, 'omega_b': 1, 'h': 2, 'n_s': 3, 'sigma_8': 4}

num_ood_simulations = 11
indices = np.round(np.linspace(0, len(sorted_simulations) - 1, num_ood_simulations)).astype(int)
print(f"Backward model is trained on params: {backward_model_fake_data_params}")
print()
percentile_gap = 100 / (num_ood_simulations - 1)
for i, ind in enumerate(indices):
    percentile = 100 - i * percentile_gap
    simulation, error = sorted_simulations[ind]
    
    rel_error = {
        param_name: f"{rel_error:.0%}"
        for param_name, rel_error in error.items()
    }
    
    print(f"The {percentile}th percentile OOD Quijote simulation is LH{simulation} with: ")
    print(f"    * relative difference: {rel_error}")
    actual_params = np.load(f'/user_data/ajliang/Quijote/LH{simulation}/params.npy')[:-3]
    actual_params = {
        param_name: actual_params[ind]
        for param_name, ind in param_name_to_index.items()
    }
    print(f"    * actual params: {actual_params}")
    print()


Backward model is trained on params: {'omega_m': 0.3, 'omega_b': 0.05, 'h': 0.7, 'n_s': 0.965, 'sigma_8': 0.799998}

The 100.0th percentile OOD Quijote simulation is LH1045 with: 
    * relative difference: {'omega_m': '66%', 'omega_b': '13%', 'h': '15%', 'n_s': '6%', 'sigma_8': '25%'}
    * actual params: {'omega_m': 0.4969, 'omega_b': 0.04369, 'h': 0.8053, 'n_s': 0.9097, 'sigma_8': 0.6003}

The 90.0th percentile OOD Quijote simulation is LH1416 with: 
    * relative difference: {'omega_m': '55%', 'omega_b': '14%', 'h': '7%', 'n_s': '10%', 'sigma_8': '18%'}
    * actual params: {'omega_m': 0.1363, 'omega_b': 0.04279, 'h': 0.7483, 'n_s': 1.0659, 'sigma_8': 0.6535}

The 80.0th percentile OOD Quijote simulation is LH0715 with: 
    * relative difference: {'omega_m': '64%', 'omega_b': '37%', 'h': '26%', 'n_s': '3%', 'sigma_8': '2%'}
    * actual params: {'omega_m': 0.4907, 'omega_b': 0.06853, 'h': 0.5179, 'n_s': 0.9323, 'sigma_8': 0.8195}

The 70.0th percentile OOD Quijote simulation is L

In [62]:
for i in range(5):
    simulation, error = sorted_simulations[i]
    print(f"The #{i + 1} most OOD simulation is LH{simulation} with: ")
    error = {
        param_name: f"{rel_error:.0%}"
        for param_name, rel_error in error.items()
    }
    print(f"    * relative difference: {error}")
    actual_params = np.load(f'/user_data/ajliang/Quijote/LH{simulation}/params.npy')[:-3]
    actual_params = {
        param_name: actual_params[ind]
        for param_name, ind in param_name_to_index.items()
    }
    print(f"    * actual params: {actual_params}")
    print()

The #1 most OOD simulation is LH1045 with: 
    * relative difference: {'omega_m': '66%', 'omega_b': '13%', 'h': '15%', 'n_s': '6%', 'sigma_8': '25%'}
    * actual params: {'omega_m': 0.4969, 'omega_b': 0.04369, 'h': 0.8053, 'n_s': 0.9097, 'sigma_8': 0.6003}

The #2 most OOD simulation is LH0590 with: 
    * relative difference: {'omega_m': '66%', 'omega_b': '3%', 'h': '3%', 'n_s': '23%', 'sigma_8': '24%'}
    * actual params: {'omega_m': 0.1019, 'omega_b': 0.05165, 'h': 0.6771, 'n_s': 1.1857, 'sigma_8': 0.9921}

The #3 most OOD simulation is LH1988 with: 
    * relative difference: {'omega_m': '65%', 'omega_b': '30%', 'h': '13%', 'n_s': '23%', 'sigma_8': '24%'}
    * actual params: {'omega_m': 0.4945, 'omega_b': 0.06513, 'h': 0.6117, 'n_s': 1.1905, 'sigma_8': 0.6043}

The #4 most OOD simulation is LH0785 with: 
    * relative difference: {'omega_m': '65%', 'omega_b': '23%', 'h': '27%', 'n_s': '1%', 'sigma_8': '24%'}
    * actual params: {'omega_m': 0.1053, 'omega_b': 0.06153, 'h': 0.8