# Fussing with GP

In [1]:
# imports
import os
from pkg_resources import resource_filename
import numpy as np

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

#import lbg_da


from bokeh import plotting
from bokeh import models
from bokeh.models.glyphs import Patch

# Plot stuff

In [3]:
def set_fontsize(p, fsz):
    '''
    Parameters
    ----------
    ax : Bokeh plot class
    fsz : float
      Font size
    '''
    p.xaxis.axis_label_text_font_size = '{:d}pt'.format(fsz)
    p.xaxis.major_label_text_font_size = "{:d}pt".format(fsz)
    #
    p.yaxis.axis_label_text_font_size = '{:d}pt'.format(fsz)
    p.yaxis.major_label_text_font_size = "{:d}pt".format(fsz)

# Load the data

In [7]:
#path = resource_filename('lbg_da', 'data/tau_eff')

## Monzon+19

In [8]:
m19 = np.load('/Users/jsmonzon/lbg_da/data/tau_eff/tau_data.npy')
m19_cov = np.load('/Users/jsmonzon/lbg_da/data/tau_eff/covariance.npy')

In [9]:
m19_cov.shape

(88, 88)

In [10]:
m19_sig = np.sqrt(np.diagonal(m19_cov))

### Plot

In [11]:
# Plot
plotting.output_notebook()

p = plotting.figure(title='tau_eff Evolution', x_axis_label='z',
                   y_axis_label='tau_eff', y_range=(0., 0.6))

p.circle(m19[0,:], m19[1,:], size=20, color="navy", alpha=0.5, legend='Monzon+19')
#p.line(phis, f, legend='f(phi)', line_width=2)
p.legend.location = "bottom_left"

#set_fontsize(p, 16)

plotting.show(p)

## Literature

In [21]:
s03 = np.load('/Users/jsmonzon/lbg_da/data/tau_eff/literature/Schaye_2003.npy')
k05 = np.load('/Users/jsmonzon/lbg_da/data/tau_eff/literature/Kirkman_2005.npy')
t17 = np.load('/Users/jsmonzon/lbg_da/data/tau_eff/literature/Thomas_2017.npy')
b13 = np.load('/Users/jsmonzon/lbg_da/data/tau_eff/literature/Becker_2013.npy')

### Plot

In [23]:
# Plot
plotting.output_notebook()

p = plotting.figure(title='tau_eff Evolution', x_axis_label='z',
                   y_axis_label='tau_eff', y_range=(0., 0.6))

p.circle(m19[0,:], m19[1,:], size=10, color="navy", alpha=0.9, legend='Monzon+19')
#
p.circle(np.concatenate([s03[0,:], k05[0,:], t17[0,:], b13[0,:]]), 
         np.concatenate([s03[1,:], k05[1,:], t17[1,:], b13[1,:]]), 
         size=5, color="gray", alpha=0.5, legend='Literature')
#p.line(phis, f, legend='f(phi)', line_width=2)
p.legend.location = "bottom_right"

#set_fontsize(p, 16)

plotting.show(p)

# Game on

## Prep the data

In [24]:
#
all_z = np.concatenate([s03[0,:], k05[0,:], t17[0,:], m19[0,:], b13[0,:]])
all_tau = np.concatenate([s03[1,:], k05[1,:], t17[1,:], m19[1,:], b13[1,:]])
all_sig = np.concatenate([s03[2,:], k05[2,:], t17[2,:], m19_sig, b13[2,:]])

In [25]:
# Cut
gdz = all_z < 4.
cut = gdz
all_z = all_z[cut]
all_tau = all_tau[cut]
all_sig = all_sig[cut]

In [26]:
X = np.atleast_2d(all_z).T
X.shape

(157, 1)

In [27]:
# Remove the mean
mean_tau = np.mean(all_tau)
norm_tau = all_tau - mean_tau

In [28]:
scale_z = 100.
scale_tau = 100.
#
scale_z = 1.
scale_tau = 1.

## Prep the GP

In [29]:
kernel = C(1.0, (1e-3, 1e3)) * RBF(0.1, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9,
                             alpha=all_sig**2)

In [30]:
gp.fit(X*scale_z, norm_tau*scale_tau)

GaussianProcessRegressor(alpha=array([2.56000000e-04, 2.56000000e-04, 6.76000000e-04, 6.76000000e-04,
       1.08900000e-03, 3.61000000e-04, 3.61000000e-04, 6.76000000e-04,
       7.29000000e-04, 3.24000000e-04, 4.41000000e-04, 3.24000000e-04,
       1.96000000e-04, 4.41000000e-04, 3.61000000e-04, 4.84000000e-04,
       4.00000000e-04, 8.41000000e-04, 6.25000000e-04, 4.84000000e-04,
       8.41000000e-...
       1.52376755e-03, 1.36809762e-04, 1.30156489e-04, 1.25427052e-04,
       1.24761723e-04, 1.32795375e-04, 1.36033808e-04, 1.42532047e-04,
       1.43986606e-04, 1.45409115e-04, 1.51138823e-04, 1.56985005e-04,
       1.71583501e-04, 1.64936560e-04, 1.63773561e-04, 1.77777778e-04,
       1.91050874e-04]),
                         copy_X_train=True, kernel=1**2 * RBF(length_scale=0.1),
                         n_restarts_optimizer=9, normalize_y=False,
                         optimizer='fmin_l_bfgs_b', random_state=None)

## Prediction

In [31]:
x = np.atleast_2d(np.linspace(1.5, 3.5, 1000)).T
y_pred, sigma = gp.predict(x*scale_z, return_std=True)

## Plot

In [21]:
# Plot
plotting.output_notebook()

p = plotting.figure(title='tau_eff Evolution', x_axis_label='z',
                   y_axis_label='tau_eff', y_range=(0., 1), x_range=(1.5,4.))

# Fill between

# GP
xv = x[:,0]

# Fill between
x2 = xv[::-1]
xp = np.hstack((xv, x2))
y2 = y_pred-sigma+mean_tau
y2 = y2[::-1]
yp = np.hstack((y_pred+sigma+mean_tau,y2))

source = models.ColumnDataSource(dict(x=xp, y=yp))
glyph = Patch(x="x", y="y", fill_color="lightgray")

p.add_glyph(source, glyph)


#p.circle(m19[0,:], m19[1,:], size=10, color="navy", alpha=0.9, legend='Monzon+19')
#
for dset, ref, clr in zip([s03,k05,t17], ['Schaye', 'Kirkman', 'Thomas'], ['blue', 'red', 'green']):
    p.circle(dset[0,:], dset[1,:], size=5, color=clr, alpha=0.5, legend=ref)
    # Error bar
    sdict = dict(base=dset[0,:], upper=dset[1,:]+dset[2,:], lower=dset[1,:]-dset[2,:])
    source2 = models.ColumnDataSource(sdict)
    whisk = models.Whisker(source=source2, base="base", upper="upper", lower="lower", level="overlay",
                      line_color=clr, line_width=2)
    p.add_layout(whisk)

# M19
p.circle(m19[0,:], m19[1,:], size=5, color='black', alpha=0.5, legend='Monzon')
# Error bar
sdict = dict(base=m19[0,:], upper=m19[1,:]+m19_sig, lower=m19[1,:]-m19_sig)
source2 = models.ColumnDataSource(sdict)
whisk = models.Whisker(source=source2, base="base", upper="upper", lower="lower", level="overlay",
                  line_color='black', line_width=2)
p.add_layout(whisk)

#p.line(phis, f, legend='f(phi)', line_width=2)
p.legend.location = "bottom_right"

# GP
p.line(x[:,0], (y_pred+mean_tau)/scale_tau, color='black', legend='Prediction')
#plt.fill(np.concatenate([x, x[::-1]]),
#         np.concatenate([y_pred - 1.9600 * sigma,
#                        (y_pred + 1.9600 * sigma)[::-1]]),
#         alpha=.5, fc='b', ec='None', label='95% confidence interval')

#set_fontsize(p, 16)

plotting.show(p)