<img src="https://github.com/seap-udea/xearch/blob/main/resources/xearch-logo.png?raw=true" align="left">

## Required packages

In [1]:
# External modules
import spiceypy as spy
import numpy as np
import matplotlib.pyplot as plt
from astropy import constants as c
from astropy.time import Time
import pandas as pd
import pickle
from datetime import datetime
from tqdm import tqdm
from IPython.display import display,HTML

# Constants
G = c.G.value
Msun = c.M_sun.value
Mearth = c.M_earth.value
Mjup = c.M_jup.value
Rsun = c.R_sun.value
Rearth = c.R_earth.value
Rjup = c.R_jup.value
Mjup2earth = Mjup/Mearth
Rjup2earth = Rjup/Rearth

# Units
K = 1 # Kelvin
days = 86400
JD = days # Julian days
BJD = days # Bessel Julian days
years = 365.25*days # Julian years
au = c.au.value 
deg = np.pi/180
rad = 1/deg
Gyr = 1e9*years
dex = 1

# Useful routines
def sex2dec(angle):
    h, m, s = map(float, angle.split())
    sgn = np.sign(h)
    return sgn*(abs(h) + m/60 + s/3600)

def date2jd(date, format='%y/%m/%d'):
    dt = datetime.strptime(date, format)
    t = Time(dt)
    jd = t.jd
    return jd

def print_df(df):
    display(HTML(df.to_html()))


## Read database

In [60]:
import xml.etree.ElementTree as ET, urllib.request, gzip, io
#url = "https://github.com/OpenExoplanetCatalogue/oec_gzip/raw/master/systems.xml.gz";suffix='open'
url = "https://github.com/OpenExoplanetCatalogue/oec_gzip/raw/master/systems-exoplanetarchive.xml.gz";suffix='nasa'
#url = "https://github.com/OpenExoplanetCatalogue/oec_gzip/raw/master/systems-exoplaneteu.xml.gz";suffix='eu'

oec = ET.parse(gzip.GzipFile(fileobj=io.BytesIO(urllib.request.urlopen(url).read())))

In [61]:
# Output mass and radius of all planets 
for planet in oec.findall(".//planet"):
    name = planet.findtext("name") 
    if 'Proxima' in name:
        print(name)
        print(planet.findtext("mass"))
    pass

Proxima Cen b
0.00337


In [62]:
binary_properties = dict(
    # General
    name = ['binary_name',str,0],
    # Position
    separation = ['sep_bin',float,1,au], 
    positionangle = ['pa_bin',float,1,deg],
    # Orbital elements
    semimajoraxis = ['a_bin',float,1],
    eccentricity = ['e_bin',float,1],
    inclination = ['I_bin',float,1],
    periastron = ['w_bin',float,1,deg],
    ascendingnode = ['W_bin',float,1,deg],
    meananomaly = ['M_bin',float,1],
    meanlongitude = ['L_bin',float,1,deg],
    period = ['period_bin',float,1,days],
    epoch = ['et_bin',float,1,BJD],
    # Times
    maximumrvtime = ['tk_bin',float,1,BJD],
    # Magnitudes
    magB = ['magB_bin',float,1,dex],
    magV = ['magV_bin',float,1,dex],
    magI = ['magI_bin',float,1,dex],
    magR = ['magR_bin',float,1,dex],
    magJ = ['magJ_bin',float,1,dex],
    magH = ['magH_bin',float,1,dex],
    magK = ['magK_bin',float,1,dex],
) # Other: nstars

system_properties = dict(
    # General
    name = ['system_name',str,0],
    # Position
    declination = ['declination',sex2dec,1,deg],
    rightascension = ['rightascension',sex2dec,1,deg],
    distance = ['distance',float,1],
) # Other: nplanets

star_properties = dict(
    # General
    name = ['star_name',str,0],
    # Physical
    mass = ['star_mass',float,1,Msun],
    radius = ['star_radius',float,1,Rsun],   
    temperature = ['star_temperature',float,1,K],
    age = ['star_age',float,1,Gyr],
    # Spectral
    metallicty = ['metallicity',float,1,dex], # Relative to sun
    spectraltype = ['spectraltype',str,0],
    # Magnitudes
    magB = ['magB',float,1,dex],
    magV = ['magV',float,1,dex],
    magI = ['magI',float,1,dex],
    magR = ['magR',float,1,dex],
    magJ = ['magJ',float,1,dex],
    magH = ['magH',float,1,dex],
    magK = ['magK',float,1,dex],
)

planet_properties = dict(
    # General
    name = ['planet_name',str,0],
    # description = ['description',str,0],
    discoverymethod = ['method',str,0],
    # Times
    istransiting = ['istransiting',int,1],
    discoveryyear = ['dicovery',int,1,years],
    #lastupdate = ['update',date2jd,1,JD],
    lastupdate = ['update',str,0],
    # Physical    
    mass = ['mass',float,Mjup2earth,Mjup],
    radius = ['radius',float,Rjup2earth,Rjup],
    temperature = ['temperature',float,1,K],
    age = ['age',float,1,Gyr],
    # Position
    impactparameter = ['b',float,1],
    separation = ['separation',float,1,au],
    # Orbital elements
    semimajoraxis = ['a',float,1],
    eccentricity = ['e',float,1],
    inclination = ['I',float,1],
    periastron = ['w',float,1,deg],
    ascendingnode = ['W',float,1,deg],
    meananomaly = ['M',float,1],
    meanlongitude = ['L',float,1,deg],
    period = ['period',float,1,days],
    epoch = ['et',float,1,BJD],
    # Times
    transittime = ['tt',float,1,BJD],
    periastrontime = ['tp',float,1],
    maximumrvtime = ['tk',float,1,BJD],
    # Other
    spinorbitalignment = ['so',float,1,deg],
)
# Columns compilation
columns = []
for properties in system_properties,star_properties,planet_properties: #,binary_properties:
    for key,item in properties.items():
        columns.append(item[0])
print(f"Number of columns: {len(columns)}")

Number of columns: 42


Getting data:

In [63]:
nsystems = 0
nallplanets = 0 
systems = dict()

exoplanets_list = []
for system in tqdm(oec.findall('.//system')):
    name = system.findtext('name')
    if name is None:
        continue
    nsystems += 1
    
    systems[name] = dict()
    exoplanet = dict()

    system_dict = dict()
    # System properties
    for key,item in system_properties.items():
        value = system.findtext(key)
        if (value is not None) and (value != ''):
            finalvalue = item[1](value)
            if item[1] != str:
                finalvalue *= item[2]
        else:
            finalvalue = None
        systems[name][item[0]] = finalvalue
        system_dict[item[0]] = finalvalue

    # Stars
    nstars = 0
    systems[name]['stars'] = []
    for star in system.findall('.//star'):
        star_name = star.findtext('name') 
        if star_name is None:
            continue
        nstars += 1
        star_dict = dict()
        for key,item in star_properties.items():
            value = star.findtext(key)
            if (value is not None) and (value != ''):
                finalvalue = item[1](value)
                if item[1] != str:
                    finalvalue *= item[2]
            else:
                finalvalue = None

            star_dict[item[0]] = finalvalue
        systems[name]['stars'].append(star_dict)
        system_dict.update(star_dict)

    systems[name]['nstars'] = nstars
    
    # Planets
    nplanets = 0
    systems[name]['planets'] = []
    for planet in system.findall('.//planet'):
        planet_name = planet.findtext('name') 
        if planet_name is None:
            continue
        nallplanets += 1
        nplanets += 1
        planet_dict = dict()
        for key,item in planet_properties.items():
            value = planet.findtext(key)
            if (value is not None) and (value != ''):
                finalvalue = item[1](value)
                if item[1] != str:
                    finalvalue *= item[2]
            else:
                finalvalue = None

            planet_dict[item[0]] = finalvalue
            exoplanet[item[0]] = finalvalue

        systems[name]['planets'].append(planet_dict)
        system_dict_planet = system_dict.copy()
        system_dict_planet.update(planet_dict)

        exoplanets_list.append(system_dict_planet)

    systems[name]['nplanets'] = nplanets
    exoplanet['nplanets'] = nplanets

exoplanets = pd.DataFrame(exoplanets_list)
for names in ['system_name','star_name','planet_name']:
    exoplanets[names] = exoplanets[names].str.replace('&ouml;', 'o') # Fixing some encoding issues

# Save dictionary data
with open(f"systems-{suffix}.pickle", 'wb') as file:
    pickle.dump(systems, file)

# Save dataframe
exoplanets.to_csv(f"exoplanets-{suffix}.csv",index=False)

print_df(exoplanets.describe())

100%|██████████| 4164/4164 [00:00<00:00, 24729.98it/s]


Unnamed: 0,declination,rightascension,distance,star_mass,star_radius,star_temperature,star_age,magV,magI,magJ,magH,magK,istransiting,dicovery,mass,radius,temperature,a,e,I,w,W,period,tt
count,5599.0,5599.0,5578.0,5604.0,5368.0,5389.0,9.0,5380.0,3373.0,5363.0,5378.0,5384.0,4434.0,5602.0,5583.0,5588.0,4140.0,5311.0,4831.0,4297.0,1668.0,9.0,5359.0,4427.0
mean,19.585125,15.775684,696.624924,0.950917,1.552338,5430.075238,4.57,12.636057,13.871267,10.991279,10.62003,10.505977,1.0,2016.101214,443.079009,5.690246,910.664857,6.649941,0.076416,86.688931,118.886523,77.73291,79843.49,2455900.0
std,35.844526,6.045221,1106.373068,0.423961,4.304881,1776.997636,0.0,3.101634,1.417113,2.983875,3.031439,3.042773,0.0,6.070814,2371.863835,5.302558,460.005472,134.118667,0.150427,11.20809,119.2336,42.291484,5493977.0,1510.289
min,-88.121111,0.0,1.30119,0.01,0.01,415.0,4.57,0.872,7.713,-2.095,-2.775,-3.044,1.0,1781.0,0.002466,0.184252,34.0,0.0044,0.0,-0.000543,-233.0,-5.112604,0.09070629,2440619.0
25%,-7.82625,12.234444,112.467,0.79,0.78,4945.0,4.57,10.8375,13.111,9.0965,8.754,8.55225,1.0,2014.0,4.036421,1.782228,568.0,0.0533,0.0,86.89,28.45,49.71321,4.474399,2454969.0
50%,39.883056,19.061944,407.1005,0.95,0.96,5557.0,4.57,13.466,14.16,11.891,11.539,11.431,1.0,2016.0,8.708498,2.779827,819.5,0.1038,0.0,88.755,104.0885,76.672615,11.6065,2455005.0
75%,45.434583,19.571944,852.0465,1.09,1.25,5900.0,4.57,15.009,14.951,13.376,12.941,12.84725,1.0,2020.0,162.092487,11.789045,1156.25,0.30315,0.09,89.67,210.0,110.30168,41.75644,2457083.0
max,85.736667,23.998333,8500.0,10.94,109.46,57000.0,4.57,44.61,19.3077,25.34,32.34,33.11,1.0,2024.0,89627.61066,77.341967,4050.0,7506.0,0.95,176.092,395.341,131.786359,402000000.0,2460640.0


In [64]:
def filter(dataframe, conditions=[]):
    cond = pd.Series([True]*len(dataframe))
    for condition in conditions:
        column = condition[0]
        value = condition[1]
        if isinstance(value,list):
            cond = (dataframe[column]>=value[0])&(dataframe[column]<=value[1])&cond
        else:
            cond = (dataframe[column] == value)&cond
    
    return dataframe[cond]

In [65]:
filter(exoplanets,[('system_name','ups And')])

Unnamed: 0,system_name,declination,rightascension,distance,star_name,star_mass,star_radius,star_temperature,star_age,metallicity,...,w,W,M,L,period,et,tt,tp,tk,so
5385,ups And,41.403889,1.613333,13.4054,ups And,1.3,1.56,6156.77,,,...,324.9,,,,4.617033,,2454083.853,,,
5386,ups And,41.403889,1.613333,13.4054,ups And,1.3,1.56,6156.77,,,...,241.7,,,,241.258,,2455133.7,,,
5387,ups And,41.403889,1.613333,13.4054,ups And,1.3,1.56,6156.77,,,...,258.82,,,,1276.46,,2453360.6,,,


In [66]:
print_df(filter(exoplanets,[('method','RV'),('e',[0.5,1])]).dropna(subset=['radius']).head(10))

Unnamed: 0,system_name,declination,rightascension,distance,star_name,star_mass,star_radius,star_temperature,star_age,metallicity,spectraltype,magB,magV,magI,magR,magJ,magH,magK,planet_name,method,istransiting,dicovery,update,mass,radius,temperature,age,b,separation,a,e,I,w,W,M,L,period,et,tt,tp,tk,so
5,16 Cyg B,50.516944,19.697778,21.1397,16 Cyg B,1.08,1.13,5750.0,,,G3V,,6.215,,,4.993,4.695,4.651,16 Cyg B b,RV,1.0,1996.0,97/07/01,565.734564,13.450777,,,,,1.66,0.68,,82.74,,,,798.5,,2456937.32,,,
69,BD-17 63,-16.227222,0.0,34.4603,BD-17 63,0.74,0.69,4714.0,,,K5V,,9.62,8.86964,,7.574,7.027,6.914,BD-17 63 b,RV,,2008.0,09/03/01,1620.924874,12.890328,,,,,1.34,0.54,,112.2,,,,655.6,,,,,
78,BD+48 740,48.93,2.716111,666.589,BD+48 740,1.09,10.33,4534.0,,,K3III,,8.69,,,6.385,5.791,5.645,BD+48 740 b,RV,,2018.0,18/05/01,540.308291,13.450777,,,,,1.7,0.76,,100.0,,,,733.0,,,,,
82,BD+63 1405,63.67,18.093333,38.0473,BD+63 1405,0.82,0.8,5000.0,,,K0,,8.96,,,7.303,6.895,6.78,BD+63 1405 b,RV,,2021.0,21/07/01,3305.415429,12.441969,,,,,2.06,0.88,5.974,94.25,,,,1198.48,,,,,
103,CoRoT-20,0.0,6.514722,844.061,CoRoT-20,1.14,1.37,4947.5,,,G2V,,14.63,,,12.991,12.652,12.513,CoRoT-20 c,RV,,2018.0,18/11/01,5403.082912,12.217789,,,,,2.9,0.6,,65.0,,,,1675.0,,,,,
200,GJ 1061,-44.514444,3.600278,3.67278,GJ 1061,0.12,0.16,2953.0,,,M5.5V,,12.7,,,7.523,7.015,6.61,GJ 1061 d,RV,,2020.0,20/03/01,1.639995,1.154525,,,,,0.054,0.53,,157.0,,,,13.031,,,,,
223,GJ 2056,-24.892222,7.201389,28.4434,GJ 2056,0.62,0.71,4069.95,,,M0V:,,10.37,,,7.704,7.065,6.89,GJ 2056 b,RV,,2020.0,20/10/01,16.199714,4.169741,,,,,0.283,0.72,,58.0,,,,69.971,,,,,
230,GJ 3021,-79.851389,0.0,17.5559,GJ 3021,0.9,0.9,5540.0,,,G6V,,6.59,,,5.366,4.99,4.859,GJ 3021 b,RV,,2000.0,01/08/01,1071.08173,13.114507,350.0,,,,0.49,0.511,,290.7,,,,133.71,,,,,
238,GJ 3222,-40.076111,3.393056,18.23817,GJ 3222,0.89,0.93,5257.46,,,K0V,,6.9,,,5.333,4.948,4.767,GJ 3222 b,RV,,2022.0,22/09/01,11.441823,3.396321,,,,,0.091,0.929,,290.895,,,,10.66669,,,,,
307,GJ 724,-13.3825,18.6825,16.9658,GJ 724,0.53,0.52,3799.0,,,M1.0V,,10.638,,,7.397,6.728,6.546,GJ 724 b,RV,,2023.0,23/10/01,10.748957,3.273022,611.0,,,,0.04685,0.577,,33.2,,,,5.101284,,,,,


In [67]:
systems = pickle.load(open(f"signals/systems-{suffix}.pickle", 'rb'))

FileNotFoundError: [Errno 2] No such file or directory: 'signals/systems-nasa.pickle'