In [1]:
import numpy as np
from typing import Tuple, List, Dict, Any, Optional

In [4]:
def generate_dummy_data(probe: str, n_samples: int = 10000) -> Tuple[np.ndarray, np.ndarray, List[str], np.ndarray]:
    """Generate dummy training data for testing."""

    if probe in ['cmb_tt', 'cmb_ee', 'cmb_te', 'cmb_pp']:
        # CMB parameters: omega_b, omega_cdm, h, tau, n_s, ln10^10A_s
        parameters = ['omega_b', 'omega_cdm', 'h', 'tau_reio', 'n_s', 'ln10^{10}A_s']
        params = np.array([
            np.random.uniform(0.019, 0.025, n_samples),  # omega_b
            np.random.uniform(0.10, 0.14, n_samples),   # omega_cdm
            np.random.uniform(0.64, 0.74, n_samples),   # h
            np.random.uniform(0.04, 0.12, n_samples),   # tau
            np.random.uniform(0.92, 1.00, n_samples),   # n_s
            np.random.uniform(2.9, 3.3, n_samples)      # ln10^10A_s
        ]).T

        # CMB modes (ell)
        modes = np.arange(2, 2509)
        n_modes = len(modes)

        # Generate dummy spectra with realistic CMB-like shape
        spectra = np.zeros((n_samples, n_modes))
        for i in range(n_samples):
            # Simple CMB-like power spectrum
            ell = modes.astype(float)
            As = 10**(params[i, 5] - 10)  # A_s
            ns = params[i, 4]             # n_s

            # Simplified CMB spectrum
            spectra[i] = As * (ell / 100)**(ns - 1) * np.exp(-ell / 1000) * 1e12

    elif probe in ['mpk_lin', 'mpk_boost']:
        # Matter power spectrum parameters
        parameters = ['omega_b', 'omega_cdm', 'h', 'n_s', 'ln10^{10}A_s', 'z']
        params = np.array([
            np.random.uniform(0.019, 0.025, n_samples),  # omega_b
            np.random.uniform(0.10, 0.14, n_samples),   # omega_cdm
            np.random.uniform(0.64, 0.74, n_samples),   # h
            np.random.uniform(0.92, 1.00, n_samples),   # n_s
            np.random.uniform(2.9, 3.3, n_samples),     # ln10^10A_s
            np.random.uniform(0.0, 3.0, n_samples)      # z
        ]).T

        # k modes
        modes = np.logspace(-4, 2, 420)  # k in h/Mpc
        n_modes = len(modes)

        # Generate dummy matter power spectra
        spectra = np.zeros((n_samples, n_modes))
        for i in range(n_samples):
            k = modes
            As = 10**(params[i, 4] - 10)  # A_s
            ns = params[i, 3]             # n_s
            z = params[i, 5]              # redshift

            # Simple matter power spectrum
            if probe == 'mpk_lin':
                spectra[i] = As * (k / 0.05)**(ns - 1) * (1 + z)**(-2) * 1e4
            else:  # mpk_boost
                spectra[i] = 1 + 0.1 * k * (1 + z)  # Simple boost factor

    else:
        raise ValueError(f"Unknown probe: {probe}")

    return params, spectra, parameters, modes

In [5]:
probe = 'mpk_lin'  # Can be 'cmb_tt', 'cmb_ee', 'cmb_te', 'cmb_pp', 'mpk_lin', 'mpk_boost', etc.
n_samples = 5000  # Number of training samples
n_test = 100      # Number of test samples

params, spectra, parameters, modes = generate_dummy_data(probe, n_samples)
print(params.shape, spectra.shape, modes.shape)

(5000, 6) (5000, 420) (420,)


In [2]:
training_data = np.load('./training_data/training_data_disco_eb.npz')
training_data = {key: training_data[key] for key in training_data.files}

cosmo_params_samples = training_data['cosmo_params_samples']
Pks = training_data['power_spectra']

In [5]:
print(np.isnan(cosmo_params_samples).any(), np.isnan(Pks).any())
print(np.all(cosmo_params_samples > 0), np.all(Pks > 0))

False False
True True


In [14]:
cosmo_params_names = ['omega_b', 'omega_cdm', 'h', 'n_s', 'ln10^{10}A_s', 'z']
list(training_data['cosmo_params_names'])

['omega_b', 'omega_cdm', 'h', 'n_s', 'ln10^{10}A_s', 'z']

In [15]:
training_data.keys()

dict_keys(['cosmo_params_samples', 'k_modes', 'power_spectra', 'cosmo_params_names'])

In [10]:
np.savez('./training_data/training_data_disco_eb.npz', **training_data)

In [17]:
params, spectra, parameters, modes = training_data['cosmo_params_samples'][:9000], training_data['power_spectra'][:9000], training_data['cosmo_params_names'], training_data['k_modes']
print(params.shape, spectra.shape, modes.shape)

(9000, 6) (9000, 256) (256,)


In [20]:
type(modes)

numpy.ndarray