In [1]:
%matplotlib notebook

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import celerite
from celerite import terms
import emcee
import corner
from scipy import signal
from scipy import stats
from IPython.display import display, Math, Image
import os
from astropy.io import fits
import pandas as pd
from astropy.timeseries import LombScargle

  '{0}.{1}.{2}'.format(*version.hdf5_built_version_tuple)


In [3]:
dir = os.getcwd()
dir

'C:\\Users\\Marc\\Documents\\repo\\stage_m2\\src'

In [4]:
par_dir = os.path.dirname(dir)
par_dir

'C:\\Users\\Marc\\Documents\\repo\\stage_m2'

In [5]:
### Définition de la likelihood ###
def log_likelihood(params,gp,time,flux):
    gp.set_parameter_vector(params)
    ll = gp.log_likelihood(flux)
    return ll if np.isfinite(ll) else -np.inf

### Définition du prior (uniforme) ###
def log_prior(params,bnds):
    ### params[0] = Log S0, params[1] = Log Q, params[2] = log w0, params[3] = offset
    if ((-30 < params[0] < 30) and (bnds[1][0] < params[1] < bnds[1][1]) and (bnds[2][0] < params[2] < bnds[2][1]) and (-1000 < params[3] < 1000)) :
        return 0
    return -np.inf

### Définition du posterior ###
def log_probability(params,gp,time,flux):
    bnds = gp.get_parameter_bounds()
    lp = log_prior(params,bnds)
    if not np.isfinite(lp):
        return -np.inf
    return lp + log_likelihood(params,gp,time,flux) if np.isfinite(lp) else -np.inf

    ### MCMC ###
def run_mcmc(time,flux,gp,mcmc_params) :
    nwalkers = mcmc_params[0]
    nburnsteps = mcmc_params[1]
    nprogsteps = mcmc_params[2]
    
    initial = gp.get_parameter_vector()
    ndim = len(initial)
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability,args=(gp,time,flux))

    print("Running burn-in...")
    p0 = initial + 1e-4 * np.random.randn(nwalkers, ndim)
    p0, lp, _ = sampler.run_mcmc(p0, nburnsteps,progress=True)

    print("Running production...")
    sampler.reset()
    sampler.run_mcmc(p0, nprogsteps,progress=True)
    print("Finished")
    
    likelihood = gp.log_likelihood(flux)
    af = sampler.acceptance_fraction
    
    print("Mean acceptance fraction:", np.mean(af))
    print("parameter_dict:\n{0}\n".format(gp.get_parameter_dict()))
    print("Final MCMC log likelihood: {0}".format(gp.log_likelihood(flux)))
    tau = sampler.get_autocorr_time()
    print(tau)
    return(sampler,likelihood)

In [6]:
def gp_building(param,time,rv,error) :
    bounds = dict(log_S0= (None,None), log_Q = (-5,10), log_omega0 = (5.4,6)) ### Bornes des hyperparamètres
    kernel = terms.SHOTerm(log_S0=1., log_Q=param[1], log_omega0=param[2],bounds=bounds) ### Choix de la fonction de covariance, ici SHO
    
    gp = celerite.GP(kernel,fit_mean=True) ### Création du GP, fit_mean = True ---> Offset
    gp.compute(time,yerr=error) ### Création de la matrice de covariance, à faire obligatoirement !
 
    print("Initial log likelihood: {0}".format(gp.log_likelihood(rv)))
    print("parameter_dict:\n{0}\n".format(gp.get_parameter_dict()))
    
    return gp

In [7]:
def GP_RV (photometry_names,mcmc_params,normalize,indiv) :
    
    ### photometry_names = noms des photométries à tester
    ### initial_params = valeurs initiales des hyperparamètres du GP
    ### normalize = utilisation de la photométrie normalisée ou brute
    ### mcmc_params = [nb de walkers, nb de burn-in steps, nb de production steps]
    ### indiv = Nuits individuelles ou groupes
    
    ### Données RV
    rv_data = np.loadtxt(par_dir + "/data/rv_betapic_carine.txt",usecols=(0,1,2))
    rv_time = rv_data[:,0]
    rv_full = rv_data[:,1]
    rv_error_full = rv_data[:,2]
    
    ### Bornes des nuits ou groupes
    if indiv == True :
        bnds=np.array([542.51,542.61,544.551,544.554,545.519,545.522,546.49,546.576,780.69,780.74,799.67,799.76,829.62,829.71,847.61,847.672,853.58,853.68,878.57,878.65,913.49,913.59,1131.71,1131.76,1131.83,1131.88,1170.67,1170.74,1519.75,1519.84,1565.63,1565.72,1567.67,1567.76,1597.58,1597.67,2569.77,2569.85,2580.79,2580.85,2583.82,2583.89,2584.84,2584.88,2685.67,2685.74,2694.53,2694.6,2706.52,2706.59,2707.58,2707.64,2773.46,2773.53,2980.82,2980.86,2982.83,2982.87,2988.69,2988.75,3040.55,3043.83,3333.85,3333.89,3344.73,3344.77,3492.46,3494.54,3666.80,3666.85,3668.74,3668.81,3711.69,3711.75,3712.68,3712.75,3713.84,3713.9,3768.51,3768.79,3769.71,3769.78,3770.55,3770.61,3771.67,3771.73,3772.59,3772.66,3773.64,3773.71,3774.75,3774.81,3849.46,3849.56,3905.44,3905.46,4007.8,4007.9,4035.67,4035.92,4036.68,4036.92,4038.66,4038.91,4039.66,4039.88,4064.71,4064.82,4093.5,4093.89,4094.57,4094.9,4095.52,4095.9,4206.48,4206.64,4207.5,4207.64,4208.48,4208.63,4226.47,4226.58,4227.48,4227.58,4228.45,4228.58,4229.46,4229.58,4230.46,4230.57,4231.46,4231.57,4238.47,4238.55,4239.5,4239.54,4242.45,4242.54,4243.45,4243.54,4244.45,4244.53,4525.52,4525.67,4786.61,4786.88,4787.62,4787.78,4789.61,4789.64,4827.60,4827.86,4828.58,4828.85,4829.6,4829.85,4905.5,4905.59,4906.5,4906.58,4908.51,4908.58,4909.52,4909.57,4921.49,4921.68,4922.49,4922.62,4923.49,4923.68])
   
    else :
        bnds = np.array([540,550,780,781,799,800,829,830,847,854,913,914,1131,1132,1170,1171,1519,1520,1565,1568,1597,1598,2569,2570,2580,2585,2685,2686,2694,2695,2706,2708,2980,2989,3344,3345,3666,3669,3711,3713,3768,3775,3849,3850,4007,4008,4035,4040,4064,4065,4093,4096,4206,4209,4226,4245,4525,4526,4786,4790,4827,4830,4905,4910,4921,4924])

    
    for name in photometry_names :
        
       
        if ((normalize == False) & (name == "TESS_ALL")) :
            break
        
        ### Récupération des postérieurs des HP du GP entraîné sur la photométrie
        ### Création d'un fichier txt pour stocker les postérieurs des HP du GP sur les RV
        elif normalize == True :
            print(name)
            gp_photo_data = np.genfromtxt(par_dir + "/data/Photometry/GP_photometry_data/" + name + "_norm_photometry_posteriors.txt",usecols=1, delimiter = ' ')
            file = open(par_dir + "/data/RV/GP_RV_data/" + name + "_norm_rv_posteriors.txt","w")
            
        else : 
            print(name)
            gp_photo_data = np.genfromtxt(par_dir + "/data/Photometry/GP_photometry_data/" + name + "_photometry_posteriors.txt",usecols=1, delimiter = ' ')
            file = open(par_dir + "/data/RV/GP_RV_data/" + name + "_rv_posteriors.txt","w")
            
        file.write("Time LogS0 LogQ Logw0 Offset Inc-S0 Inc+S0 Inc-Q Inc+Q Inc-w0 Inc+w0 Inc-Offset Inc+Offset Likelihood\n") 
        
        
        ### Pour chaque nuit ou chaque groupe
        for j in range(np.int(len(bnds)/2)) :
            
            index = np.ndarray.flatten(np.argwhere((rv_time>bnds[j*2])&(rv_time<bnds[(j*2)+1]))) ### Isolement du groupe/nuit
            
            
            ### faire le sorting des rv/rv_errors
            
            rv = rv_full[index]*1000 ### en m/s
            rv_error = rv_error_full[index]*1000 ### en m/s
            sort_index = np.ndarray.flatten(np.argwhere(np.sort(rv_time[index])))
            rv = rv[sort_index]
            rv_error = rv_error[sort_index]
            time = np.sort(rv_time[index])
            
            rv_time_plot = np.mean(time) ### Moyenne du groupe/nuit
            print(name+"/ %.2f\n"%rv_time_plot)
            photo_posteriors = gp_photo_data[:3] ### Récupération des HP photométrie
            
            gp = gp_building(photo_posteriors,time,rv,rv_error) ### Création du GP avec les posteriors du GP photométrie
            sampler,likelihood = run_mcmc(time,rv,gp,mcmc_params) ### MCMC

            ## Posteriors ###
            flat_samples = sampler.get_chain(discard=5000,thin = 1,flat=True) ### Récupération des samples, on jette les samples du burn-in
            
            ### Labels pour les corner plots
            labels = ["ln S0","ln Q","ln $\omega_0$ [day$^{-1}$]","Offset [m/s]"]

            posteriors = np.array([])
            posteriors_errors = np.array([])
            output_posteriors = np.array([]) 
            output_data = np.array([])
            for i in range(flat_samples.shape[1]):

                mcmc = np.percentile(flat_samples[:, i], [16, 50, 84]) ### On récupère les valeurs de 1 sigma 
                value = np.percentile(flat_samples[:, i], [50]) ### On récupère la mediane
                q = np.diff(mcmc) ### Incertitude
                txt = "\mathrm{{{3}}} = {0:.3f}_{{-{1:.3f}}}^{{{2:.3f}}}"
                txt = txt.format(mcmc[1], q[0], q[1], labels[i])
                display(Math(txt))
                posteriors = np.append(posteriors,value) ### On stocke la médiane de chaque hyperparamètre
                posteriors_errors = np.append(posteriors_errors,q) ### On stocke les incertitudes de chaque hyperparamètre

            
            output_posteriors = np.append(output_posteriors,posteriors)
            output_posteriors = np.append(output_posteriors,posteriors_errors)
            output_posteriors = np.append(output_posteriors,likelihood)
            output_data = np.append(output_data,rv_time_plot)
            output_data = np.append(output_data,output_posteriors)

            np.savetxt(file,output_data.reshape(1, output_data.shape[0]),fmt='%10s',delimiter=' ')
            
            ### Corner plots
            if normalize == True :

                fig = corner.corner(flat_samples, labels=labels, truths=[None,photo_posteriors[1],photo_posteriors[2],None],quantiles=[0.5],show_titles=True,title_fmt = '.3f',levels=(0.68,0.95,0.997))
                axes = np.array(fig.axes).reshape((flat_samples.shape[1], flat_samples.shape[1]))
                print(posteriors)
                for i in range(flat_samples.shape[1]):
                    ax = axes[i,i]
                    ax.fill_between((posteriors[i] - posteriors_errors[i*2],posteriors[i] + posteriors_errors[i*2+1]),len(flat_samples), color='gray', alpha=0.3, edgecolor="none",interpolate=True)
                plt.savefig(par_dir + "/plots/%s/GP_RV/Posteriors_RV/%.2f_Posteriors_MCMC_%s_norm.png"%(name,rv_time_plot,name),bbox_inches='tight')
                plt.show()

            else :
                fig = corner.corner(flat_samples, labels=labels, truths=[None,None,np.log(2*np.pi*24*60/30.4)],quantiles=[0.5],show_titles=True,title_fmt = '.3f',levels=(0.68,0.95,0.997))
                axes = np.array(fig.axes).reshape((flat_samples.shape[1], flat_samples.shape[1]))
                for i in range(flat_samples.shape[1]):
                    ax = axes[i, i]
                    ax.fill_between([posteriors[i] - posteriors_errors[i*2],posteriors[i] + posteriors_errors[i*2+1]],len(flat_samples), color='gray', alpha=0.3, edgecolor="none",interpolate=True)
                plt.savefig(par_dir + "/plots/%s/GP_RV/Posteriors_RV/%.2f_Posteriors_MCMC_%s.png"%(name,rv_time_plot,name),bbox_inches='tight')
                plt.close()
        file.close()
                

In [8]:
### Calcul des résidus + périodogrammes
def rv_periodo_residus (photometry_names,periods,normalize,indiv) :
    
    ### photometry_names = noms des photométries à tester
    ### periods = périodes pour le périodogramme
    ### normalize = utilisation de la photométrie normalisée ou brute
    ### indiv = Nuits individuelles ou groupes
    
    ### Données RV
    rv_data = np.loadtxt(par_dir + "/data/rv_betapic_carine.txt",usecols=(0,1,2))
    rv_time = rv_data[:,0]
    rv_full = rv_data[:,1]
    rv_error_full = rv_data[:,2]

    for name in photometry_names :

        if ((normalize == False) & (name == "TESS_ALL")) :
            break
        
        ### On récupère les posterieurs HP du GP des RV entraîné sur la photométrie
        elif normalize == True :
            print(name)
            gp_rv_data = np.genfromtxt(par_dir + "/data/RV/GP_RV_data/" + name + "_norm_rv_posteriors.txt",usecols=(0,1,2,3,4),delimiter = ' ')
            
        else : 
            print(name)
            gp_rv_data = np.genfromtxt(par_dir + "/data/RV/GP_RV_data/" + name + "rv_posteriors.txt",usecols=(0,1,2,3,4), delimiter = ' ')
        
        ### Bornes des nuits ou groupes
        if indiv == True :
            bnds=np.array([542.51,542.61,544.551,544.554,545.519,545.522,546.49,546.576,780.69,780.74,799.67,799.76,829.62,829.71,847.61,847.672,853.58,853.68,878.57,878.65,913.49,913.59,1131.71,1131.76,1131.83,1131.88,1170.67,1170.74,1519.75,1519.84,1565.63,1565.72,1567.67,1567.76,1597.58,1597.67,2569.77,2569.85,2580.79,2580.85,2583.82,2583.89,2584.84,2584.88,2685.67,2685.74,2694.53,2694.6,2706.52,2706.59,2707.58,2707.64,2773.46,2773.53,2980.82,2980.86,2982.83,2982.87,2988.69,2988.75,3040.55,3043.83,3333.85,3333.89,3344.73,3344.77,3492.46,3494.54,3666.80,3666.85,3668.74,3668.81,3711.69,3711.75,3712.68,3712.75,3713.84,3713.9,3768.51,3768.79,3769.71,3769.78,3770.55,3770.61,3771.67,3771.73,3772.59,3772.66,3773.64,3773.71,3774.75,3774.81,3849.46,3849.56,3905.44,3905.46,4007.8,4007.9,4035.67,4035.92,4036.68,4036.92,4038.66,4038.91,4039.66,4039.88,4064.71,4064.82,4093.5,4093.89,4094.57,4094.9,4095.52,4095.9,4206.48,4206.64,4207.5,4207.64,4208.48,4208.63,4226.47,4226.58,4227.48,4227.58,4228.45,4228.58,4229.46,4229.58,4230.46,4230.57,4231.46,4231.57,4238.47,4238.55,4239.5,4239.54,4242.45,4242.54,4243.45,4243.54,4244.45,4244.53,4525.52,4525.67,4786.61,4786.88,4787.62,4787.78,4789.61,4789.64,4827.60,4827.86,4828.58,4828.85,4829.6,4829.85,4905.5,4905.59,4906.5,4906.58,4908.51,4908.58,4909.52,4909.57,4921.49,4921.68,4922.49,4922.62,4923.49,4923.68])
   
        else :
            bnds = np.array([540,550,780,781,799,800,829,830,847,854,913,914,1131,1132,1170,1171,1519,1520,1565,1568,1597,1598,2569,2570,2580,2585,2685,2686,2694,2695,2706,2708,2980,2989,3344,3345,3666,3669,3711,3713,3768,3775,3849,3850,4007,4008,4035,4040,4064,4065,4093,4096,4206,4209,4226,4245,4525,4526,4786,4790,4827,4830,4905,4910,4921,4924])

        gp_rv_data = gp_rv_data[1:,:]
        rv_time_plot = gp_rv_data[:,0]
        
        ### Création des fichiers pour stocker les résidus et les périodogrammes
        if normalize == True :
            file_periodo = open(par_dir + "/data/RV/Periodo/Periodo_" + name + "_norm.txt","w")
            file_periodo_residus = open(par_dir + "/data/RV/Periodo/Periodo_Residus_" + name + "_norm.txt","w")
            file_residus = open(par_dir + "/data/RV/Résidus/Residus_" + name + "_norm.txt","w")

        else :
            file_periodo = open(par_dir + "/data/RV/Periodo/Periodo_" + name + ".txt","w")
            file_periodo_residus = open(par_dir + "/data/RV/Periodo/Periodo_Residus_" + name + ".txt","w")
            file_residus = open(par_dir + "/data/RV/Résidus/Residus_" + name + ".txt","w")
            
        file_periodo.write("Period Power\n")
        file_periodo_residus.write("Period Power\n")
        file_residus.write("Time Residus STD\n")
         
            
        for j,t in zip(range(np.int(len(bnds)/2)),range(len(rv_time_plot))) :
            
            print(rv_time_plot[t])
            index = np.ndarray.flatten(np.argwhere((rv_time>bnds[j*2])&(rv_time<bnds[(j*2)+1]))) ### Sélection des nuits/groupes
            rv = rv_full[index]*1000 ### en m/s
            time = np.sort(rv_time[index])
            rv_error = rv_error_full[index]*1000 ### en m/s
            gp_rv_posteriors = gp_rv_data[t,1:5] ### Posteriors des RV pour la nuit/groupe

            gp = gp_building(gp_rv_posteriors,time,rv,rv_error) ### Création du GP
            mean, pred_var = gp.predict(rv, time, return_var=True) ### Prédiction 
            pred_std = np.sqrt(pred_var)
            
            residus = rv - mean ### Calcul des résidus sans l'offset
            residus = residus + gp_rv_posteriors[-1] ### Ajout de l'offset     

            
            np.savetxt(file_residus,np.column_stack((time,residus,pred_std)),delimiter=' ') ### Sauvegarde des résidus

        
        file_residus.close()

        
        ### Récupération des résidus 
        if normalize == True :
            file_residus_data = np.loadtxt(par_dir + "/data/RV/Résidus/Residus_" + name + "_norm.txt",usecols=(0,1,2),skiprows=1)

        else :
            file_residus_data = np.loadtxt(par_dir + "/data/RV/Résidus/Residus_" + name + ".txt",usecols=(0,1),skiprows=1)
        
        freqs = 1/periods
        time_res = file_residus_data[:,0]
        rv_res = file_residus_data[:,1]
        
        ### Périodogrammes ###
        rv_power = LombScargle(rv_time,rv_full - np.mean(rv_full),rv_error_full).power(freqs) ### Périodogramme Data RV en j-1
        residus_power = LombScargle(time_res,rv_res - np.mean(rv_res)).power(freqs) ### Périodogramme Résidus en j-1

        np.savetxt(file_periodo,np.column_stack((periods,rv_power)),delimiter=' ') 
        np.savetxt(file_periodo_residus,np.column_stack((periods,residus_power)),delimiter=' ') 
        
        file_periodo.close()
        file_periodo_residus.close()
        

In [22]:
photometry_names = ["TESS_s4","TESS_s5","TESS_s6","TESS_s7","TESS_s32","TESS_s33","TESS_s34","TESS_ALL"]
GP_RV(photometry_names,mcmc_params=[256,5000,15000],normalize=True,indiv=False)

  0%|          | 0/5000 [00:00<?, ?it/s]

TESS_ALL
TESS_ALL/ 3712.31

Initial log likelihood: -840.9258355125321
parameter_dict:
OrderedDict([('kernel:log_S0', 1.0), ('kernel:log_Q', 2.4064048052780627), ('kernel:log_omega0', 5.749413859765567), ('mean:value', 0.0)])

Running burn-in...


100%|██████████| 5000/5000 [03:36<00:00, 23.13it/s]
  0%|          | 0/20000 [00:00<?, ?it/s]

Running production...


100%|██████████| 20000/20000 [14:25<00:00, 23.10it/s]


Finished
Mean acceptance fraction: 0.5849488281249999
parameter_dict:
OrderedDict([('kernel:log_S0', 0.8441335898464828), ('kernel:log_Q', 3.635465743207418), ('kernel:log_omega0', 5.692709120248325), ('mean:value', 123.67387264539842)])

Final MCMC log likelihood: -718.339530410964
[48.09402646 52.10002051 47.61425247 47.62186685]


<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Javascript object>

[  2.33919583   3.94953327   5.71312192 137.59024572]


In [12]:
photometry_names = ["TESS_s4","TESS_s5","TESS_s6","TESS_s7","TESS_s32","TESS_s33","TESS_s34","TESS_ALL"]
periods = np.arange(1,20000,0.01) #en jours
rv_periodo_residus(photometry_names,periods,normalize=True,indiv=True)

test
test2
TESS_ALL
544.4022638109756
Initial log likelihood: -226.24102802931645
parameter_dict:
OrderedDict([('kernel:log_S0', 1.0), ('kernel:log_Q', 2.94870891947027), ('kernel:log_omega0', 5.730017299912728), ('mean:value', -0.07938312208384131)])

780.7150648285715
Initial log likelihood: -15.549154565022475
parameter_dict:
OrderedDict([('kernel:log_S0', 1.0), ('kernel:log_Q', 4.558224270558764), ('kernel:log_omega0', 5.671389750547355), ('mean:value', -0.06245353383695509)])

799.7134983384615
Initial log likelihood: -14.62068916410811
parameter_dict:
OrderedDict([('kernel:log_S0', 1.0), ('kernel:log_Q', 4.1979725708322775), ('kernel:log_omega0', 5.573459272190918), ('mean:value', -0.06822580464590754)])

829.6662701111111
Initial log likelihood: -189.30724251891806
parameter_dict:
OrderedDict([('kernel:log_S0', 1.0), ('kernel:log_Q', 3.36772846821317), ('kernel:log_omega0', 5.689101808011294), ('mean:value', -0.200606306369966)])

851.3270640846154
Initial log likelihood: -106.2