# TOTEMS - Tidal Orbital decay Timing Extrapolation & Modelling Software

In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

#Import curve fitting (Which is what we need for tidal decay)
from scipy.optimize import curve_fit

#Import pylightcurve, used for BJD HJD conversions - thanks to Angelos Tsiaras
import pylightcurve as plc

In [2]:
%matplotlib notebook
# if this isn't in a separate cell, sometimes doesn't work right
# magic commands are weird, to say the least.




First function: gets the relevant data for the exoplanet from the [http://var2.astro.cz/ETD/](Exoplanet Transit
Database). Use this if you don't
have any archive data of your own to use, though really any data from a recent paper will probably be better.

In [3]:
def get_etd_data(ra, dec, url):
    """Imports the archive data from ETD. Uses the RA & Dec of a target to convert HJD_UTC to BJD_TDB.
    Inputs:
    - ra, the RA in hh mm ss.ss
    - dec, the Declination in hh mm ss.ss
    - url, the URL of datafile
    Outputs:
    - no, the number of the entry in ETD's list
    - bjd, t_mid converted to BJD_TDB
    - mid_err, the uncertainty in t_mid
    - epoch, the epoch number for the transit based on the t_0 and period on ETD.
    - dq, ETD's Data Quality measurement. 1 is best, 5 is worst.
    """
    # Get the period and mid-time @ epoch 0 automatically from ETD
    # Yes, this is absolutely appalling. I am open to suggestions on better ways to do this!

    p_str, t_str = np.loadtxt(url, dtype=str, encoding='unicode_escape', delimiter=', ', skiprows=2, max_rows=1)

    for possibility in p_str.split():
        try:
            str(float(possibility))
            p_0 = possibility
        except ValueError:
            pass # if its not the number, move on

    for possibility in t_str.split():
        try:
            str(float(possibility))
            t_0 = possibility
        except ValueError:
            pass # if its not the number, move on

    # Now for the actual transit data: use genfromtxt as there may be missing values.
    # Import the data into a series of arrays using numpy loadtxt. Note we skip the first 5 rows and only use
    # certain columns, because there's lots of extra stuff we aren't using
    no,hjd,mid_err,epoch,dq = np.genfromtxt(url, encoding='unicode_escape', delimiter=';', skip_header=5, missing_values='', filling_values=-1, usecols=(0,1,2,3,10), unpack=True)

    # convert from the truncated HJD to the full HJD
    hjd = hjd + 2400000

    # assuming ETD uses HJD_UTC, convert to BJD_TDB.
    # of course, it might be worth checking that each individual data point specified is ACTUALLY HJD,
    # as I don't think ETD vets for those kinds of mistakes

    ra_dec_string = ra+" "+dec #concatenate into one string for the hh mm ss.ss to deg convert function
    ra, dec = plc.ra_dec_string_to_deg(ra_dec_string) # convert from hh mm ss.ss to degrees

    # Convert to BJD_TDB. N.B. we assume uncertainty remains the same.
    bjd = [] # make a blank array to store BJD in
    for val in tqdm(hjd):
        bjd.append(plc.hjd_utc_to_bjd_tdb(ra, dec, val)) # do the actual converting

    # return the data arrays. Notice this means you end up with five arrays, where each of the five elements is an
    # array for number, BJD, uncertainty, epoch, DQ. Also, two constants, the period and time at epoch 0.
    # Yes, this is a dreadful situation to be in. A high priority todo is to rewrite these into "datapoint" objects
    # where each object stores a mid-transit time and uncertainty, its number, epoch, and DQ.
    return no, bjd, mid_err,epoch,dq, p_0, t_0

In [4]:
def filter_etd_data(no, bjd, mid_err, epoch, dq, filter_dq):
    """Filters ETD data. Checks uncertainty exists. Returns only datapoints with DQ equal or better than specified
    value. N.B. DQ 1 is best, 5 is worst.
    Inputs:
    - no
    - bjd
    - mid_err
    - epoch
    - dq
    - filter_dq
    Outputs:
    - filteredData, the filtered data array (of arrays). Contains three arrays: BJD mid-time, uncertainty, and epoch.
    """

    # Let's filter for "good" data. I don't trust data without a tmid uncertainty, so check it exists/isn't 0
    good_data=[[],[],[],[],[]]

    for i in tqdm(range(0, len(no))):
        if mid_err[i] > 0:
            good_data[0].append(no[i])
            good_data[1].append(bjd[i])
            good_data[2].append(mid_err[i])
            good_data[3].append(epoch[i])
            good_data[4].append(dq[i])

    # the plan: one array, contains three arrays - these three arrays are for mid, mid_err, epoch
    # we don't bother preserving no., it was just imported to be used basically as a debugging tool
    # and dq is pointless once we've filtered. This also makes the addition of non-ETD data far easier
    filtered_data = [[],[],[]]

    for i in tqdm(range(0, len(good_data[1]))): # for each entry in the data
        if good_data[4][i] <= filter_dq: # check the DQ vs the specified DQ argument. If better...

            # ...then add the data to each of the three arrays in filteredData that we care about
            filtered_data[0].append(good_data[1][i])
            filtered_data[1].append(good_data[2][i])
            filtered_data[2].append(good_data[3][i])

    return filtered_data # return the data array containing only the transits that's been filtered by DQ

## Statistics functions

These functions are for the $\chi^2$ comparison. I sincerely doubt that I haven't accidentally reinvented the wheel - functions for this probably already exist. However, it's so simple, I've not exactly wasted hours on these.

Equations are from "Measurements and Their Uncertainties: A Practical Guide to Modern Error Analysis: Hughes and
Hase 2010".

$$ \chi^2 = \sum_i{\frac{y_i-y(x_i)}{\alpha_i^2}}$$

$\nu$ is the degrees of freedom: the number of datapoints minus the number of fitted parameters. We divide $\chi^2$ by
 $\nu$ to obtain the reduced chi-squared, as follows:

$$ \chi^2_\text{reduced} = \frac{\chi^2}{\nu} $$

In [5]:
def chi_squared(y,yx,alpha):
    """Calculates the unreduced chi squared.
    Inputs:
    - y, the actual observed y value (the y_i in the formula)
    - yx, the y value of the fitted line (the y(x) in the formula )
    - alpha, the error bar for y, in the same units as y
    Outputs:
    - chi2, the unreduced chi squared value
    """

    chi2=0
    for i in tqdm(range(0,len(y))):
        chi2 += ((y[i]-yx[i])**2) / (alpha[i]**2)
    return chi2

def reduced_chi_squared(y,yx,alpha,m):
    """Calculates the reduced chi squared from the raw data: just chisq divided by degrees of freedom
    Degree of freedom is just number of observations n - number of fitted parameters m
    where n is just the number of y (or y(x) or x) values
    Inputs:
    - y, the actual observed y value (the y_i in the formula)
    - yx, the y value of the fitted line (the y(x) in the formula )
    - alpha, the error bar for y, in the same units as y
    - m, the number of fitted parameters.
    Outputs:
    - chi2, the unreduced chi squared value
    """
    n = len(y) # number of observations/datapoints
    dof = n-m # degree of freedom, m is fitted params
    chi2 = chi_squared(y,yx,alpha) # get the unreduced chi squared
    reduced_chi2 = chi2/dof # reduce it
    return reduced_chi2

In [6]:
# Working example for WASP-12b, using ETA data:

ra = "06 30 32.79"
dec = "+29 40 20.20"
url = "http://var2.astro.cz/ETD/ascii-etd.php?id=246&STARNAME=WASP-12&PLANET=b&PER=1.0914222&EPOCH=2454508.97605"

# Get the data from ETD
no, bjd, mid_err, epoch, dq, wasp12b_p0, wasp12b_t0 = get_etd_data(ra, dec, url)

# Filter the data - let's use DQ 4, just to exclude only the worst data
DQ_filter = 4
wasp12b_data = filter_etd_data(no, bjd, mid_err, epoch, dq, DQ_filter)

# So, now we have the period and mid-time @ epoch 0: p_0 and t_0
# and we have the mid-times, uncertainties, and epochs, stored in wasp12b_data

# print(wasp12b_p0)
# print(wasp12b_t0)
# print(wasp12b_data[0])
# print(wasp12b_data[1])
# print(wasp12b_data[2])

100%|██████████| 260/260 [00:03<00:00, 80.59it/s]
100%|██████████| 260/260 [00:00<00:00, 145247.61it/s]
100%|██████████| 255/255 [00:00<00:00, 260801.64it/s]


[2458930.3322150814, 2458906.3069253056, 2458895.392665395, 2458884.47817548, 2458883.389555486, 2458883.387165486, 2458872.470795561, 2458871.3825555695, 2458871.3791355696, 2458860.4658756373, 2458859.375955643, 2458848.461685704, 2458837.550155758, 2458561.4191047084, 2458538.5077344007, 2458537.414204386, 2458537.407634386, 2458536.3170643724, 2458536.3129943716, 2458513.4022540255, 2458501.396543836, 2458501.392013836, 2458501.3916738355, 2458490.4828236564, 2458490.477903656, 2458489.387803637, 2458489.3834536374, 2458479.568743471, 2458478.4770034514, 2458467.5641532624, 2458457.7342530857, 2458455.5545930457, 2458411.896052222, 2458396.6175619145, 2458396.615161915, 2458214.3469873583, 2458178.3302862793, 2458167.4160459414, 2458157.5943356357, 2458131.400314797, 2458109.5717340778, 2458096.474133639, 2458087.7458433453, 2458076.8293529726, 2457832.3508739634, 2457809.4350430444, 2457809.431273044, 2457775.6011016755, 2457774.506521631, 2457773.415161587, 2457772.3273715423, 24