In [43]:
'''
    Read Exoplot Data 
    
    Ruizhi Zhan modified from Daniel Koll's script
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

root = os.getcwd()
root_prefixed = lambda p: os.path.join(root,p)

In [44]:
# read in all planets
df = pd.read_csv(root_prefixed('data/exoplots_data.csv'))
# print out the first few rows
print("Here are the first few rows of the data from \"data/exoplots_data.csv\": \n")
df.head()

Here are the first few rows of the data from "data/exoplots_data.csv": 



Unnamed: 0,name,hostname,IC,disposition,period,rade,rade_est,rade_err1,rade_err2,radj,...,st_teff,st_log_lum,Jmag,Kmag,ra,dec,flag_tran,flag_kepler,flag_k2,url
0,OGLE-2016-BLG-1227L b,OGLE-2016-BLG-1227L,,Confirmed,,,13.947316,,,,...,,,,,265.597125,-33.759778,False,False,False,https://exoplanetarchive.ipac.caltech.edu/over...
1,Kepler-276 c,Kepler-276,,Confirmed,31.884,2.9,,1.27,-0.28,0.258721,...,5779.0,-0.089,14.234,13.838,293.568197,39.036312,True,True,False,https://exoplanetarchive.ipac.caltech.edu/over...
2,Kepler-829 b,Kepler-829,,Confirmed,6.883376,2.11,,0.44,-0.18,0.188242,...,5698.0,0.04,13.699,13.29,282.332831,42.463813,True,True,False,https://exoplanetarchive.ipac.caltech.edu/over...
3,K2-283 b,K2-283,,Confirmed,1.921036,3.52,,0.16,-0.16,0.314034,...,5060.0,-0.524,12.579,11.983,13.194368,9.692918,True,False,True,https://exoplanetarchive.ipac.caltech.edu/over...
4,Kepler-477 b,Kepler-477,,Confirmed,11.119907,2.07,,0.18,-0.12,0.184673,...,5240.0,-0.372,12.274,11.729,288.067445,42.355305,True,True,False,https://exoplanetarchive.ipac.caltech.edu/over...


In [45]:
# print out the column names
print("\nHere are the column names of the data from \"data/exoplots_data.csv\": ")
print(df.columns)
print('''
name: planet name
hostname: host star name
IC: Input Catalog number (prior for Kelper survey)
disposition: Confirmed or Candidate
period: orbital period, unit: day
rade: real planet radius, unit: Earth radius
rade_est: estimated planet radius (based on mass), unit: Earth radius
rade_err1: upper error of rade, unit: Earth radius
rade_err2: lower error of rade, unit: Earth radius
radj: real planet radius, unit: Jupiter radius
radj_est: estimated planet radius (from rade_est), unit: Jupiter radius
masse: real planet mass, unit: Earth mass
masse_est: estimated planet mass (based on radius), unit: Earth mass
massj: real planet mass, unit: Jupiter mass
tran_depth_ppm: transit depth, unit: ppm
tran_dur_hr: transit duration, unit: hour
semi_au: semi-major axis, unit: AU
eccen: eccentricity
insol: insolation flux, unit: Earth flux
distance_pc: distance of the system, unit: parsec
year_discovered: year of discovery
year_confirmed: year of confirmation
discoverymethod: method of discovery ['Microlensing' 'Transit' 'Radial Velocity' 
   'Eclipse Timing Variations' 'Astrometry' 'Imaging' 'Transit Timing Variations' 
   'Orbital Brightness Modulation' 'Pulsar Timing' 'Disk Kinematics' 'Pulsation Timing Variations']
facility: e.g. 'James Webb Space Telescope (JWST)'
st_mass: stellar mass, unit: Solar mass
st_rad: stellar radius, unit: Solar radius
st_temp: stellar effective temperature, unit: K
st_log_lum: stellar log luminosity, unit: Solar luminosity
Jmag, Kmag: J band and K band magnitude
ra, dec: Right Ascension and DEClination
flag_tran: detected via transit or not [True/False]
flag_kepler: detected by Kepler or not [True/False]
flag_k2: detected by Kepler extended mission (K2) or not [True/False]
url: point you to on clicking
'''
)


Here are the column names of the data from "data/exoplots_data.csv": 
Index(['name', 'hostname', 'IC', 'disposition', 'period', 'rade', 'rade_est',
       'rade_err1', 'rade_err2', 'radj', 'radj_est', 'radj_err1', 'radj_err2',
       'masse', 'masse_est', 'masse_err1', 'masse_err2', 'massj', 'massj_est',
       'massj_err1', 'massj_err2', 'tran_depth_ppm', 'tran_dur_hr', 'semi_au',
       'eccen', 'insol', 'distance_pc', 'year_discovered', 'year_confirmed',
       'discoverymethod', 'facility', 'st_mass', 'st_rad', 'st_teff',
       'st_log_lum', 'Jmag', 'Kmag', 'ra', 'dec', 'flag_tran', 'flag_kepler',
       'flag_k2', 'url'],
      dtype='object')

name: planet name
hostname: host star name
IC: Input Catalog number (prior for Kelper survey)
disposition: Confirmed or Candidate
period: orbital period, unit: day
rade: real planet radius, unit: Earth radius
rade_est: estimated planet radius (based on mass), unit: Earth radius
rade_err1: upper error of rade, unit: Earth radius
rade_err2:

In [47]:
print( "Total planets: ", len(df) )
# first, many planets only have estimated mass or radius
# --> use best of (data, estimate) to get rid of nans
print( "Planets with real radii: ", np.sum(~np.isnan(df['rade'])) )
print( "Planets with estd. radii: ", np.sum(~np.isnan(df['rade_est'])) )
print( "Planets with real or estd. radii: ", np.sum(~np.isnan(df['rade'])) + np.sum(~np.isnan(df['rade_est'])) )

df['rade_merged'] = np.zeros_like(df['rade'].copy()) * np.nan
mask = ~np.isnan(df['rade'])
df.loc[mask,'rade_merged'] = df.loc[mask,'rade']      # where available, use real values
mask = np.logical_and( np.isnan(df['rade']), ~np.isnan(df['rade_est']) )
df.loc[mask,'rade_merged'] = df.loc[mask,'rade_est']  # else use estimated values
print( f"(** Nr of planets with some kind of radius: {np.sum(~np.isnan(df['rade_merged']))})")

print("\n")
# ---
print( "Planets with real mass: ", np.sum(~np.isnan(df['masse'])) )
print( "Planets with estd. mass: ", np.sum(~np.isnan(df['masse_est'])) )
print( "Planets with real or estd. mass: ", np.sum(~np.isnan(df['masse'])) + np.sum(~np.isnan(df['masse_est'])) )

df['masse_merged'] = np.zeros_like(df['masse'].copy()) * np.nan
mask = ~np.isnan(df['masse'])
df.loc[mask,'masse_merged'] = df.loc[mask,'masse']  # where available, use real values
mask = np.logical_and( np.isnan(df['masse']), ~np.isnan(df['masse_est']) )
df.loc[mask,'masse_merged'] = df.loc[mask,'masse_est']
print( f"(** Nr of planets with some kind of mass: {np.sum(~np.isnan(df['masse_merged']))})")

print("\n")
# also check other parameters
print( "Planets with stellar radii: ", np.sum(~np.isnan(df['st_rad'])) )
print( "Planets with stellar mass: ", np.sum(~np.isnan(df['st_mass'])) )
print( "Planets with stellar temp: ", np.sum(~np.isnan(df['st_teff'])) )
print("\n")
print( "Planets with orbital periods: ", np.sum(~np.isnan(df['period'])) )
print( "Planets with eccentricity: ", np.sum(~np.isnan(df['eccen'])))

print("\n")
print("Comfirmed planet:" , np.sum(df['disposition'] == 'Confirmed'))

Total planets:  13586
Planets with real radii:  11710
Planets with estd. radii:  1354
Planets with real or estd. radii:  13064
(** Nr of planets with some kind of radius: 13064)


Planets with real mass:  2570
Planets with estd. mass:  10494
Planets with real or estd. mass:  13064
(** Nr of planets with some kind of mass: 13064)


Planets with stellar radii:  12871
Planets with stellar mass:  12184
Planets with stellar temp:  12815


Planets with orbital periods:  13227
Planets with eccentricity:  4826


Comfirmed planet: 5616


In [42]:
# and check the data source
print( "Planets detected by Kepler and K2: ", np.sum(df['flag_kepler'])+np.sum(df['flag_k2']))
print( "Planets detected by Kepler and K2 with real mass: ", np.sum(np.logical_and(df['flag_kepler'], ~np.isnan(df['masse'])))
      +np.sum(np.logical_and(df['flag_k2'], ~np.isnan(df['masse']))))
print ( "Planets detected by Kepler and K2 with real radius: ", np.sum(np.logical_and(df['flag_kepler'], ~np.isnan(df['rade'])))
      +np.sum(np.logical_and(df['flag_k2'], ~np.isnan(df['rade']))))
print( "Planets detected by Kepler and K2 via transit: ", np.sum(np.logical_and(df['flag_kepler'], df['flag_tran']))
      +np.sum(np.logical_and(df['flag_k2'], df['flag_tran'])))

Planets detected by Kepler and K2:  6336
Planets detected by Kepler and K2 with real mass:  490
Planets detected by Kepler and K2 with real radius:  6128
Planets detected by Kepler and K2 via transit:  6279


In [10]:
# check how many planets have BOTH mass + radius
print( "Planets with BOTH real radius and mass: ", np.sum( np.logical_and(~np.isnan(df['rade']),~np.isnan(df['masse'])) ) )
# check how many planets have radius < 2Re
print( "Planets with BOTH real radius and mass, that are smaller than 2Re: ", 
                                                       np.sum( 
                                                           np.logical_and(
                                                               np.logical_and(~np.isnan(df['rade']),~np.isnan(df['masse'])),
                                                               df['rade']<2
                                                           )
                                                       ) )

Planets with BOTH real radius and mass:  1216
Planets with BOTH real radius and mass, that are smaller than 2Re:  175
