In [None]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
from matplotlib import pylab
%config InlineBackend.figure_format = 'retina'
matplotlib.rcParams['figure.dpi'] = 80
textsize = 'x-large'
params = {'legend.fontsize': 'large',
          'figure.figsize': (5, 4),
         'axes.labelsize': textsize,
         'axes.titlesize': textsize,
         'xtick.labelsize': textsize,
         'ytick.labelsize': textsize}
pylab.rcParams.update(params)

We can read in our training data from a `.npy` file:

In [None]:
train_fn = 'data/cosmology_train.npy'
data_train = np.load(train_fn, allow_pickle=True)

The file is saved as an array of dictionaries that have x (input parameters), y (output statistics), and labels.

In [None]:
n_train = data_train.shape[0]
print("Number of datapoints:", n_train)
print("Dict keys:", data_train[0].keys())

The cosmology dataset contains cosmological parameters (Omega_m, sigma8, Omega_b) as inputs, and the correlation function as output. The correlation function is measured at 10 separation values $r$.

In [None]:
r_vals = np.linspace(50, 140, 10)
xs_train = np.array([d['x'] for d in data_train])
ys_train = np.array([d['y'] for d in data_train])
print("Number of input parameters:", xs_train.shape[1])
print("Number of output values:", ys_train.shape[1])

We can plot our training set of statistics:

In [None]:
plt.figure(figsize=(8,6))
ys_train_plot = ys_train.copy()
np.random.shuffle(ys_train_plot) # shuffle so that color order isn't weird
plt.plot(r_vals, ys_train_plot.T, alpha=0.8)
plt.xlabel('$r$')
plt.ylabel(r'$\xi(r)$')

Let's do the same for our test set:

In [None]:
test_fn = 'data/cosmology_test.npy'
data_test = np.load(test_fn, allow_pickle=True)
n_test = data_test.shape[0]
print("Number of datapoints:", n_test)
print("Dict keys:", data_test[0].keys())
xs_test = np.array([d['x'] for d in data_test])
ys_test = np.array([d['y'] for d in data_test])
print("Number of input parameters:", xs_test.shape[1])
print("Number of output values:", ys_test.shape[1])

In [None]:
plt.figure(figsize=(8,6))
plt.plot(r_vals, ys_test.T, alpha=0.8)
plt.xlabel('$r$')
plt.ylabel(r'$\xi(r)$')

We can also check out the parameter space. We plot the testing and training points used:

TODO!