In [1]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import emcee
import corner
import pickle
%matplotlib widget

### Definition of the SquaredExp kernel and its first + second derivatives

In [2]:
def sq_exp(t1, t2, *, sigma=1.0, lnl=np.log(2.5)):
    l = np.exp(lnl)
    x = (1 / (2 * l**2)) * (t1 - t2)**2
    return sigma**2 * np.exp(-x)

def sq_exp_cross(t1, t2, *, sigma=1.0, lnl=np.log(2.5)):
    l = np.exp(lnl)
    return sigma**2*(t1 - t2)*np.exp((-t1**2/2 + t1*t2 - t2**2/2)/l**2)/l**2

def sq_exp_grad(t1, t2, *, sigma=1.0, lnl=np.log(2.5)):
    l = np.exp(lnl)
    return sigma**2*(l**2 - t1**2 + 2*t1*t2 - t2**2)*np.exp((-t1**2/2 + t1*t2 - t2**2/2)/l**2)/l**4

def sample_gp(random, K, size=None):
    return random.multivariate_normal(np.zeros(K.shape[0]), K, size=size)

### Definition of the diagonal and off-diagonal terms for the covariance block matrix

In [3]:
def K_11(t_1, t_2, p):
    if isinstance(t_1, np.ndarray) and isinstance(t_2, np.ndarray):
        t_1 = t_1[:, None]
        t_2 = t_2[None, :]
        
    A, B, C, D, lnl = p
    
    first_term = A**2 * sq_exp(t_1, t_2, lnl=lnl)
    # Note that the 2nd and 3rd terms cancel by a sign
    fourth_term = B**2 * sq_exp_grad(t_1, t_2, lnl=lnl)
    
    return first_term + fourth_term
    
def K_12(t_1, t_2, p):
    
    if isinstance(t_1, np.ndarray) and isinstance(t_2, np.ndarray):
        t_1 = t_1[:, None]
        t_2 = t_2[None, :]
    
    A, B, C, D, lnl = p
    
    first_term = A*C * sq_exp(t_1, t_2, lnl=lnl)
    second_term = A*D * sq_exp_cross(t_1, t_2, lnl=lnl)
    third_term = B*C * sq_exp_cross(t_2, t_1, lnl=lnl)
    fourth_term = B*D * sq_exp_grad(t_1, t_2, lnl=lnl)
    
    return first_term + second_term + third_term + fourth_term

def K_21(t_1, t_2, p):
    
    if isinstance(t_1, np.ndarray) and isinstance(t_2, np.ndarray):
        t_1 = t_1[:, None]
        t_2 = t_2[None, :]
    
    A, B, C, D, lnl = p
    
    first_term = C*A * sq_exp(t_1, t_2, lnl=lnl)
    second_term = C*B * sq_exp_cross(t_1, t_2, lnl=lnl)
    third_term = D*A * sq_exp_cross(t_2, t_1, lnl=lnl)
    fourth_term = D*B * sq_exp_grad(t_1, t_2, lnl=lnl)
    
    return first_term + second_term + third_term + fourth_term
    
def K_22(t_1, t_2, p):
    
    if isinstance(t_1, np.ndarray) and isinstance(t_2, np.ndarray):
        t_1 = t_1[:, None]
        t_2 = t_2[None, :]
    
    A, B, C, D, lnl = p
    
    first_term = C**2 * sq_exp(t_1, t_2, lnl=lnl)
    # Note the 2nd and 3rd terms cancel by a sign
    fourth_term = D**2 * sq_exp_grad(t_1, t_2, lnl=lnl)
    
    return first_term + fourth_term

### Load in time series data, use only certain sections

In [4]:
var_path = "example data/tau_0.050"

t_flux = np.load(var_path + "_t.npy")[500:2500]
flux = np.load(var_path + "_f.npy")[500:2500]
flux_err = np.load(var_path + "_ferr.npy")[500:2500]

mu = np.mean(flux)
flux = (flux / mu - 1) * 1e3
flux_err = flux_err * 1e3 / mu

t_rad_full = np.load(var_path + "_t.npy")[700:1750]
rv_full = np.load(var_path + "_rv.npy")[700:1750]
rv_err_full = np.load(var_path + "_rverr.npy")[700:1750]

# Pull a random cadence of N~10,25,50
rng_seed = 100 #404
n_rv_cadence = 20

random = np.random.default_rng(rng_seed)
inds = np.sort(random.choice(np.arange(len(t_rad_full)), size=n_rv_cadence, replace=False))

#inds = np.arange(0, len(t_rad_full), 225)

t_rad = t_rad_full[inds]
rv = rv_full[inds]
rv_err = rv_err_full[inds]

trained_data_dic = {"Flux Time":t_flux, "Flux":flux, "Flux Error":flux_err,
                    "RV Time":t_rad_full, "RV":rv_full, "RV Error":rv_err,
                    "Sampled RV Time":t_rad, "Sampled RV":rv, "Sampled RV Error":rv_err,
                    "RNG Seed":rng_seed, "N RV":n_rv_cadence}

In [5]:
def time_series_data(save=True, data_dict_fn=None, data_dict=None):
    data_dict_fn = "Two Latent GPs Tests/" + data_dict_fn
    
    if save:
        with open(data_dict_fn, 'wb') as handle:
            pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        with open(data_dict_fn, 'rb') as handle:
            data_dict = pickle.load(handle)
            
        return data_dict
    
#time_series_data(data_dict_fn='f500-3000_rv700-1750_nrv{0}_seed{1}.pickle'.format(n_rv_cadence, rng_seed), data_dict=trained_data_dic)
#time_series_data(data_dict_fn='f0-1000_rv500-2000_nrv7_equal_spaced.pickle'.format(), data_dict=trained_data_dic)

In [6]:
def cov_mat(params, t_f, t_rv):
    """
    function to build covariance matrix
    """
    Kappa11 = K_11(t_f, t_f, params)
    Kappa12 = K_12(t_f, t_rv, params)
    Kappa21 = Kappa12.T
    Kappa22 = K_22(t_rv, t_rv, params)

    cov = np.concatenate((
          np.concatenate((Kappa11, Kappa12), axis=1),
          np.concatenate((Kappa21, Kappa22), axis=1),
          ), axis=0)
    
    return cov

In [7]:
def log_like(r, K):
    """
    Pulled from Dan's notebook, updated with Cholesky decomposition
    https://github.com/dfm/gp/blob/main/solutions.ipynb
    
    The multivariate Gaussian ln-likelihood (up to a constant) for the
    vector ``r`` given a covariance matrix ``K``.
    
    :param r: ``(N,)``   The residual vector with ``N`` points.
    :param K: ``(N, N)`` The square (``N x N``) covariance matrix.
    
    :returns lnlike: ``float`` The Gaussian ln-likelihood. 
    
    """
    # Slow version, factor ~2x slower.
    #return -0.5 * (np.dot(r, np.linalg.solve(K, r)) + np.linalg.slogdet(K)[1])

    # Cholesky decomposition, faster
    # For more info, check out: https://math.stackexchange.com/questions/3158303/using-cholesky-decomposition-to-compute-covariance-matrix-determinant
    try:
        cho_decomp = scipy.linalg.cho_factor(K)
        log_det_cov = 2*np.sum(np.log(np.diag(cho_decomp[0])))
        return -0.5 * (np.dot(r, scipy.linalg.cho_solve(cho_decomp, r)) + log_det_cov) #+ (len(r)*np.log(2.*np.pi)))
    except np.linalg.LinAlgError:
        return -np.inf

In [8]:
def gp_neg_log_prob(params, t_f, t_rv, y, y_err):
    
    #mean_flux, mean_rv = params[0], params[1] # do not implement mean for now
    #mean = np.concatenate((np.zeros(len(t_f))+mean_flux, np.zeros(len(t_rv))+mean_rv))
    jitter = np.exp(params[0])
    kernel_params = params[1:]
    
    # Compute the covariance matrix for the first GP
    K1 = cov_mat(kernel_params[:5], t_f, t_rv)
    
    # Compute the covariance matrix for the second GP
    K2 = cov_mat(kernel_params[5:], t_f, t_rv)
    
    K = K1 + K2
    K[np.diag_indices_from(K)] += y_err**2 + jitter
    
    # Compute the negative log likelihood
    return -log_like(y, K)

### Optimize model parameters and Sample Parameters

In [9]:
def minimize_gp_kernel(y):
    
    p0 = np.array([np.log(1.0**2.), 0.5, -0.4, 0.7, 5.0, np.log(2.3),
                   0.5, -0.4, 0.7, 5.0, np.log(2.3)])
    
    #b = [(np.log(1e-3**2), np.log(1e2**2)),
    #     (0.01, 100.0), (-100.0, 100.0), (-250.0, 250.0), (-500.0, 500.0), np.log((0.1, 5)),
    #     (0.01, 100.0), (-100.0, 100.0), (-250.0, 250.0), (-500.0, 500.0), np.log((0.1, 5))]
    
    b = [(np.log(1e-3**2), np.log(1e2**2)),
         (0.01, 100.0), (-1.0, 1.0), (-250.0, 250.0), (-250.0, 250.0), np.log((0.5, 5)),
         (0.01, 100.0), (-1.0, 1.0), (-250.0, 250.0), (-250.0, 250.0), np.log((0.5, 5))]
    
    #options={'disp': None, 'maxls': 20, 'iprint': -1, 'gtol': 1e-08, 'eps': 1e-08, 'maxiter': 15000, 'ftol': 1e-09, 'maxcor': 10, 'maxfun': 20000}
    #options = {'maxiter':25000}
    
    #result = scipy.optimize.minimize(gp_neg_log_prob, p0, args=(t_flux, t_rad, y, np.concatenate((flux_err, rv_err))), method='Nelder-Mead', bounds=b, options=options)
    #result = scipy.optimize.minimize(gp_neg_log_prob, p0, args=(t_flux, t_rad, y, np.concatenate((flux_err, rv_err))))#, bounds=b, options=options)
    
    result = scipy.optimize.minimize(gp_neg_log_prob, p0, args=(t_flux, t_rad, y, np.concatenate((flux_err, rv_err))))
    
    return result

In [10]:
y = np.concatenate((flux, rv))
res = minimize_gp_kernel(y)

print(res)

      fun: -2552.4412351024075
 hess_inv: array([[ 1.81458864e-02,  1.13068135e-03, -2.40525297e-03,
         1.96437171e-01,  8.32688241e-02,  1.27067694e-02,
        -7.98456834e-03,  4.62868616e-02,  1.60997290e+00,
         1.15649425e+00,  9.60654674e-03],
       [ 1.13068135e-03,  1.13025247e-03, -4.41307520e-04,
         3.63905771e-03, -2.54498581e-02,  3.69234662e-03,
        -4.20891371e-03,  1.86990638e-02,  4.25050886e-01,
         4.25978732e-01,  3.39465682e-03],
       [-2.40525297e-03, -4.41307520e-04,  1.38747903e-03,
        -3.40359613e-03, -4.89815116e-02, -6.53106326e-03,
         3.81750719e-03, -2.70643487e-02, -6.39845140e-01,
        -5.22434264e-01, -5.28711339e-03],
       [ 1.96437171e-01,  3.63905771e-03, -3.40359613e-03,
         3.08008880e+01,  6.72087652e+00,  5.61981968e-02,
        -1.18166776e-01,  8.39420853e-01,  9.29514294e+00,
         1.25353598e+01,  2.21774389e-01],
       [ 8.32688241e-02, -2.54498581e-02, -4.89815116e-02,
         6.72087652

### Plot model and data

In [11]:
def cov_mat_test(params, t_test_f, t_test_rv, t_train_f, t_train_rv):
    """
    function to build covariance matrix for test data
    """
    
    Kappa11 = K_11(t_test_f, t_train_f, params)
    Kappa12 = K_12(t_test_f, t_train_rv, params)
    Kappa21 = K_21(t_test_rv, t_train_f, params)
    Kappa22 = K_22(t_test_rv, t_train_rv, params)
            
    cov = np.concatenate((
          np.concatenate((Kappa11, Kappa12), axis=1),
          np.concatenate((Kappa21, Kappa22), axis=1),
          ), axis=0)
    
    return cov

def trained_cov(p):
    """
    function to build covariance matrix from optimized model parameters computed from training set 
    """
    cov_train1 = cov_mat(p[1:6], t_flux, t_rad)
    cov_train2 = cov_mat(p[6:], t_flux, t_rad)
    cov_train = cov_train1 + cov_train2

    cov_train[np.diag_indices_from(cov_train)] += np.concatenate((flux_err, rv_err))**2 + np.exp(p[0])
    
    return cov_train

def predict(p, n_test=1000):
    
    cov_train = trained_cov(p)
    
    factor = (scipy.linalg.cholesky(cov_train, overwrite_a=True, lower=False), False)
    alpha  = scipy.linalg.cho_solve(factor, y, overwrite_b=True)
    
    t_test_flux = np.linspace(min(t_flux), max(t_flux), n_test)
    t_test_rv  = np.linspace(min(t_rad_full), max(t_rad_full), n_test)
    
    #k_test_flux = np.zeros((len(t_test_flux)))
    #k_test_rv = np.zeros((len(t_test_rv)))
    
    cov_test_only1 = cov_mat(p[1:6], t_test_flux, t_test_rv)
    cov_test_only2 = cov_mat(p[6:],  t_test_flux, t_test_rv)
    cov_test_only = cov_test_only1 + cov_test_only2
    
    cov_test1 = cov_mat_test(p[1:6], t_test_flux, t_test_rv, t_flux, t_rad) # t_flux and t_rad are training data
    cov_test2 = cov_mat_test(p[6:],  t_test_flux, t_test_rv, t_flux, t_rad)
    cov_test = cov_test1 + cov_test2
    
    mu = np.dot(cov_test, alpha)
    var = cov_test_only[np.diag_indices_from(cov_test_only)]
    inv_cov_test = np.linalg.solve(cov_train, cov_test.T)
    var -= np.sum(cov_test.T * inv_cov_test, axis = 0)
    
    return mu, var, t_test_flux, t_test_rv

In [12]:
mu, var, t_test_flux, t_test_rv = predict(res.x)

In [16]:
print(mu)

[-4.04904629 -4.07630418 -4.09417736 ... -9.37080224 -7.98634223
 -6.58358691]


In [15]:
def plot_data_and_model(p0=None):
    
    fig, ax = plt.subplots(nrows=2, figsize=(12, 6), gridspec_kw={'hspace':0.4})
    
    ax[0].scatter(t_flux, flux, color='black', s=5.0, label='LC w/ errors')#, alpha=0.5)
    ax[0].plot(t_flux, flux-flux_err, color='grey', label='True LC', zorder=9)#, alpha=0.5)
    
    rv_train = ax[1].scatter(t_rad, rv, color='orange', s=100.0, alpha=0.9, zorder=10, marker='*', label='Trained RVs')
    rv_data = ax[1].scatter(t_rad_full, rv_full, color='black', s=5.0, label='RVs w/ errors')#, alpha=0.3)
    true_rv, = ax[1].plot(t_rad_full, rv_full-rv_err_full, color='grey', label='True RVs')#, alpha=0.3)
    
    ax[0].plot(t_test_flux, mu[:len(t_test_flux)], lw=2.0, color='C0', ls='-', label='GP$_\mathrm{LC}$', zorder=10)
    rv_gp, = ax[1].plot(t_test_rv, mu[len(t_test_flux):], lw=2.0, color='C0', ls='-', zorder=1, label='GP$_\mathrm{RV}$')
    
    ax[0].fill_between(t_test_flux, y1=mu[:len(t_test_flux)]-np.sqrt(var[:len(t_test_flux)]),
                       y2=mu[:len(t_test_flux)]+np.sqrt(var[:len(t_test_flux)]), color='C0', alpha=0.5)
    
    ax[1].fill_between(t_test_rv, y1=mu[len(t_test_flux):]-np.sqrt(var[len(t_test_flux):]),
                       y2=mu[len(t_test_flux):]+np.sqrt(var[len(t_test_flux):]), color='C0', alpha=0.25)
    
    ax[0].set_ylabel(r"Norm. Flux (ppt)")
    ax[1].set_ylabel(r"RV (m s$^{-1}$)")
    
    ax[0].set_xlabel(r"Time (day)")
    ax[1].set_xlabel(r"Time (day)")
    
    ax[0].set_xlim([min(t_flux), max(t_flux)])
    ax[1].set_xlim([min(t_rad), max(t_rad)])
    
    ax[0].set_ylim([min(flux)-2.0, max(flux)+2.5])
    ax[1].set_ylim([min(rv_full)-175., max(rv_full)+150.])
    
    ax[0].legend(fontsize=12, markerscale=2.0)
    rv_l1 = ax[1].legend([rv_train, rv_data], ["Trained RVs", "RVs w/ errors"], fontsize=12, markerscale=1.5, loc='lower right')
    rv_l2 = ax[1].legend([rv_gp, true_rv], ["GP$_\mathrm{RV}$", "True RV"], fontsize=12, markerscale=1.5, loc='lower left')
    ax[1].add_artist(rv_l1)

    data_dict_fn = "Two Latent GPs Tests/SqExp/"
    plot_fn = data_dict_fn + 'f500-2500_rv700-1750_nrv{0}_seed{1}.png'.format(n_rv_cadence, rng_seed)
    #plot_fn = data_dict_fn + 'f0-1000_rv2500-3500_nrv7_equal_spaced.png'
    #plt.savefig(plot_fn, bbox_inches='tight', dpi=400)
    #plt.savefig("Plots/two_latent_GP_model_comp_nRV_10.png", bbox_inches='tight', dpi=400)
    
    plt.tight_layout()
    plt.show()
    
#plot_data_and_model(p0 = np.array([0.1, 0.04, 0.07, 5.0, 20.0, 2.0]))
plot_data_and_model()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  plt.tight_layout()


In [14]:
plt.close()