In [1]:
%matplotlib notebook

In [2]:
import autograd.numpy as np
import matplotlib.pyplot as plt
import celerite
from celerite import terms
import emcee
import corner
from scipy import signal
from IPython.display import display, Math, Image
import os
import pandas as pd
import glob

In [3]:
dir = os.getcwd()
dir

'/home/marc/Stages/Stage_M2/src'

In [4]:
par_dir = os.path.dirname(dir)
par_dir

'/home/marc/Stages/Stage_M2'

In [18]:
def log_likelihood(params,time,flux):
        gp.set_parameter_vector(params)
        ll = gp.log_likelihood(flux)
        return ll if np.isfinite(ll) else -np.inf

### Définition de la likelihood ###
def log_likelihood(params,time,flux):
    gp.set_parameter_vector(params)
    ll = gp.log_likelihood(flux)
    return ll if np.isfinite(ll) else -np.inf

### Définition du prior (uniforme) ###
def log_prior(params):
    if ((-30 < params[0] < 20) and (-10 < params[1] < 10) and (5 < params[2] < 7)) :
        return 0
    return -np.inf

### Définition du posterior ###
def log_probability(params,time,flux):
    lp = log_prior(params)
    if not np.isfinite(lp):
        return -np.inf
    return lp + log_likelihood(params,time,flux) if np.isfinite(lp) else -np.inf

    ### MCMC ###
def run_mcmc(time,flux,gp) :
    initial = gp.get_parameter_vector()
    ndim, nwalkers = len(initial), 128
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability,args=(time,flux))

    print("Running burn-in...")
    p0 = initial + 1e-8 * np.random.randn(nwalkers, ndim)
    p0, lp, _ = sampler.run_mcmc(p0, 100,progress=True)

    print("Running production...")
    sampler.reset()
    sampler.run_mcmc(p0, 1000,progress=True)
    print("Finished")
    
    likelihood = gp.log_likelihood(flux)
    af = sampler.acceptance_fraction
    
    print("Mean acceptance fraction:", np.mean(af))
    print("parameter_dict:\n{0}\n".format(gp.get_parameter_dict()))
    print("Final MCMC log likelihood: {0}".format(gp.log_likelihood(flux)))
    return(sampler,likelihood)

In [6]:
def gp_building(param,time,flux,error) :
    bounds = dict(log_S0= (None,None), log_Q = (-10,10), log_omega0 = (5,7))
    #kernel = terms.SHOTerm(log_S0=7., log_Q=2.5, log_omega0=5.)
    kernel = terms.SHOTerm(log_S0=param[0], log_Q=param[1], log_omega0=param[2],bounds=bounds)


    gp = celerite.GP(kernel,mean=np.mean(flux))
    gp.compute(time,yerr=error)
 
    print("Initial log likelihood: {0}".format(gp.log_likelihood(flux)))
    print("parameter_dict:\n{0}\n".format(gp.get_parameter_dict()))
    
    return gp

In [7]:
### DATA ###
photometry_data = {"TESS_18_19" : "/data/TESS_18_19_data.dat"}#,
#        "TESS_20_0" : "/data/TESS_20_0_data.fits",
#        "TESS_20_1" : "/data/TESS_20_1_data.fits",
#        "TESS_21" : "/data/TESS_21_data.fits"}

sectors_names = {"TESS_18_19" : ["All"]}#,"Sector1","Sector2","Sector3"]}#,
#                 "TESS_20_0" : ["All","Sector1","Sector2"],
#                 "TESS_20_1" : ["All","Sector1","Sector2"],
#                 "TESS_21" : ["All","Sector1","Sector2"]}

photometry_sectors = {"TESS_18_19" : {"All" : np.array([1437,1517])}}#,
#                                      "Sector1" : np.array([1437,1468]),
#                                       "Sector2" : np.array([1468,1491]),
#                                       "Sector3" : np.array([1491,1517])},
#                       "TESS_20_0" : {"All" : np.array([2174,2200]),
#                                      "Sector1" : np.array([2174,2186]),
#                                      "Sector2" : np.array([2186,2200])},
#                       "TESS_20_1" : {"All" : np.array([2200,2228]),
#                                      "Sector1" : np.array([2200,2214]),
#                                      "Sector2" : np.array([2214,2228])},
#                       "TESS_21" : {"All" : np.array([2228,2254]),
#                                      "Sector1" : np.array([2228,2242]),
#                                      "Sector2" : np.array([2242,2254])}}


In [8]:
### Params ###
initial_params = [1,2.5,5.8]
mask_dumps = np.array([])
start_dumps = np.array([1441.024,1444.026,1447.026,1450.026,1451.552,1454.588,1457.588,1460.588,1463.588,1468.379,1471.505,1474.630,1481.276,1484.401,1487.526,1491.627,1494.797,1497.922,1501.046,1504.702,1507.859,1510.984,1514.109])
end_dumps = np.array([1441.033,1444.033,1447.033,1450.033,1451.555,1454.595,1457.595,1460.595,1463.595,1468.387,1471.512,1474.637,1481.283,1484.408,1487.533,1491.634,1494.807,1497.931,1501.051,1504.705,1507.872,1510.995,1514.121])

In [9]:
### Creating dataframe ###
columns = ["Photometry","Sector","Likelihood","Log S0","Log Q","Log w0","Inc - S0","Inc + S0","Inc - Q","Inc + Q","Inc - w0","Inc + w0"]
rows = photometry_data.keys()
df = pd.DataFrame(columns=columns)

for key in photometry_data.keys() :
    for name in sectors_names[key] :
        df2 = pd.DataFrame([[key,name],[]],columns=['Photometry','Sector'])
        df = df.append(df2.loc[0])
df.set_index(["Photometry","Sector"],inplace=True)
df.sort_index(inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Likelihood,Log S0,Log Q,Log w0,Inc - S0,Inc + S0,Inc - Q,Inc + Q,Inc - w0,Inc + w0
Photometry,Sector,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TESS_18_19,All,,,,,,,,,,


In [19]:
for photometry in photometry_data.keys() :
    file = photometry_data[photometry]
    
    for sector in photometry_sectors[photometry].keys() :
        
        if photometry == "TESS_18_19" :
            data = np.loadtxt(par_dir + file,usecols=(0,1))

            time = data[:,0]
            flux = data[:,1]
            flux_error = 4.55e-5

            for i in range(len(start_dumps)) :
                dumps_indexes = np.argwhere((time>start_dumps[i])&(time<end_dumps[i]))
                mask_dumps = np.append(mask_dumps,dumps_indexes)

            time = np.delete(time,mask_dumps.astype(int))
            flux = np.delete(flux,mask_dumps.astype(int))
              
        else :
            hdul = fits.open(par_dir + file)
            data = hdul[1].data
            time = data['TIME']
            flux = data['PDCSAP_FLUX']
            flux_error = data['PDCSAP_FLUX_ERR']

        index = np.argwhere((time > photometry_sectors[photometry][sector][0]) & (time < photometry_sectors[photometry][sector][1]))
        time = np.ndarray.flatten(time[index])
        flux = np.ndarray.flatten(flux[index])
        
        if photometry != "TESS_18_19" :
            flux_error = np.ndarray.flatten(flux_error[index])
        
        gp = gp_building(initial_params,time,flux,flux_error)

        ### Minimize ###
        from scipy.optimize import minimize

        def neg_log_like(params, y, gp):
            gp.set_parameter_vector(params)
            return -gp.log_likelihood(y)
        # extract our initial guess at parameters
        # from the celerite kernel and put it in a 
        # vector:
        p0 = gp.get_parameter_vector()
        bounds = gp.get_parameter_bounds()

        # run optimization:
        results = minimize(neg_log_like, p0, method='L-BFGS-B',bounds=bounds,args=(flux, gp))
        print(results)
        print("Final log-likelihood: {0}".format(-results.fun))
        print(np.exp(results.x))
        print("Parameters: {0}".format(gp.get_parameter_dict()))
        gp.set_parameter_vector(results.x)

        sampler,likelihood = run_mcmc(time,flux,gp)
        df.loc[(photometry,sector),['Likelihood']] = likelihood
        
        ## Posteriors ###
        flat_samples = sampler.get_chain(flat=True)
        labels = ["log(S0)","log(Q)","log(w0)"]

        posteriors = np.array([])
        posteriors_errors = np.array([])
#         fig = corner.corner(flat_samples, labels=labels, truths=[None,None,np.log(2*np.pi*24*60/30.4)])
#         plt.title("Posteriors_MCMC_%s_%s"%(photometry,sector))
#         plt.savefig(par_dir + "/plots/%s/%s/Posteriors_MCMC_%s_%s.pdf"%(photometry,sector,photometry,sector),bbox_inches='tight')
#         plt.close()  

#         x = np.linspace(np.min(time),np.max(time),15000)
#         pred_mean, pred_var = gp.predict(flux, x, return_var=True)
#         pred_std = np.sqrt(pred_var)

#         color = "#ff7f0e"
#         plt.figure()
#         plt.scatter(time,flux,s=1)
#         plt.plot(x, pred_mean, color=color)
#         plt.fill_between(x, pred_mean+pred_std, pred_mean-pred_std, color=color, alpha=0.3, edgecolor="none",interpolate=True)
#         plt.xlabel("x")
#         plt.ylabel("y")
#         plt.ylim(np.min(flux),np.max(flux))
#         plt.title("Fit_MCMC_%s_%s"%(photometry,sector))
#         plt.savefig(par_dir + "/plots/%s/%s/Fit_MCMC_%s_%s.pdf"%(photometry,sector,photometry,sector),bbox_inches='tight')
#         plt.close() 

        for i in range(flat_samples.shape[1]):

            mcmc = np.percentile(flat_samples[:, i], [16, 50, 84])
            value = np.percentile(flat_samples[:, i], [50])
            q = np.diff(mcmc)
            txt = "\mathrm{{{3}}} = {0:.3f}_{{-{1:.3f}}}^{{{2:.3f}}}"
            txt = txt.format(mcmc[1], q[0], q[1], labels[i])
            display(Math(txt))
            posteriors = np.append(posteriors,value)
            posteriors_errors = np.append(posteriors_errors,q)
            
        df.loc[(key,sector),"Log S0":"Log w0"] = posteriors
        df.loc[(key,sector),"Inc - S0" : "Inc + w0"] = posteriors_errors

Initial log likelihood: -145850.463689708
parameter_dict:
OrderedDict([('kernel:log_S0', 1.0), ('kernel:log_Q', 2.5), ('kernel:log_omega0', 5.8)])

      fun: -315003.5232299846
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  0.50640661, -13.07344072,  17.18290161])
  message: 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 140
      nit: 17
     njev: 35
   status: 0
  success: True
        x: array([-21.05789122,   2.1494279 ,   5.76454753])
Final log-likelihood: 315003.5232299846
[7.15606113e-10 8.57994844e+00 3.18794766e+02]
Parameters: OrderedDict([('kernel:log_S0', -21.057891221376707), ('kernel:log_Q', 2.149427904079407), ('kernel:log_omega0', 5.764547539956216)])
Running burn-in...


100%|██████████| 100/100 [01:41<00:00,  1.02s/it]


Running production...


100%|██████████| 1000/1000 [17:22<00:00,  1.04s/it]

Finished
Mean acceptance fraction: 0.6458671874999999
parameter_dict:
OrderedDict([('kernel:log_S0', -21.076197463939796), ('kernel:log_Q', 2.1746148710045565), ('kernel:log_omega0', 5.769057767513927)])

Final MCMC log likelihood: 314999.7612855668





<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

In [21]:
df.to_csv(par_dir + '/data/Posteriors_photometries.csv')