In [None]:
%load_ext autoreload
%autoreload 2
%xmode verbose

In [None]:
# from ExoRM.get_data import get_data
# get_data()

# from ExoRM.initialize_model import initialize_model
# initialize_model(kwargs = {'cores': 4}) # change as needed

# Use these to initialize / update the model

In [None]:
from ExoRM import read_rm_data, ForecasterRM, preprocess_data, ExoRM

erm = ExoRM()
erm.load_trace()

import numpy
import matplotlib.pyplot as plot
import matplotlib
import pandas
import seaborn
import math
import time

save = False # save the figures / csv to files
plot.style.use('seaborn-v0_8-paper')
seaborn.set_theme(style = 'white', context = 'paper')
matplotlib.rcParams['figure.figsize'] = [4, 3]
matplotlib.rcParams['axes.labelsize'] = 10  # Axis label font size
matplotlib.rcParams['font.family'] = 'serif'
matplotlib.rcParams['mathtext.fontset'] = 'cm'
matplotlib.rcParams['figure.dpi'] = 500
matplotlib.rcParams['figure.constrained_layout.use'] = True

matplotlib.rcParams['lines.markersize'] = 1.2  # Default marker size (scatter size)
matplotlib.rcParams['lines.linewidth'] = 2   # Default line width

path = 'Paper Material/ExoRM'

data = read_rm_data()
data = preprocess_data(data)
data = data[['name', 'radius', 'mass', 'density', 'error_score']]

data

In [None]:
columns = ['radius', 'mass']

x = data['radius']
y = data['mass']

x = numpy.log10(x)
y = numpy.log10(y)
ax = seaborn.kdeplot(data, x = 'radius', y = 'mass', fill = True, cmap = 'Blues', levels = 20, cbar = True, log_scale = True)
# seaborn.scatterplot(numpy.log10(data[columns]), x = 'radius', y = 'mass', s = 5, color = 'black', zorder = 2)
# plot.gca().set_aspect('auto')

# plot.xlim(-0.3, 1.6)
# plot.ylim(-0.75, 4.25)

plot.xlabel('Radius (R$_{\\oplus}$)')
plot.ylabel('Mass (M$_{\\oplus}$)')
plot.title('Exoplanet Radius-Mass Distribution')

# if save: plot.savefig(f'{path}/Figure 1.jpeg')

plot.show()

In [None]:
xs = numpy.linspace(x.min() - 0.1, x.max() + 0.1, 1000)

ms, lower, upper = erm.predict_full_linear(10 ** xs)
ms2 = ForecasterRM.forecaster(xs)

plot.scatter(10 ** x, 10 ** y)
plot.plot(10 ** xs, ms, color = 'C1')
plot.plot(10 ** xs, 10 ** ms2, '--', color = 'C2')

plot.fill_between(10 ** xs, lower, upper, color = 'C1', alpha = 0.25)

plot.legend(['target', 'ExoRM', 'Forecaster'])
plot.xlabel('Radius (R$_{\\oplus}$)')
plot.ylabel('Mass (M$_{\\oplus}$)')
plot.title('Exoplanet Mass-Radius Relation')

plot.loglog()

if save: plot.savefig(f'{path}/Figure 2.jpeg')

plot.show()

In [None]:
m, l, u = erm.predict_full(x)
out_error = len(x[(y < l) | (y > u)])
1 - (out_error / len(x))

In [None]:
a = numpy.log10(5.51)  # Density of Earth in g / cm^3

plot.scatter(10 ** x, 10 ** ((y - 3 * x) + a))
ds = (numpy.log10(ms) - 3 * xs) + a
ds2 = (ms2 - 3 * xs) + a
plot.plot(10 ** xs, 10 ** ds, color = 'C1')
plot.plot(10 ** xs, 10 ** ds2, '--', color = 'C2')

plot.fill_between(10 ** xs,
                  10 ** (((numpy.log10(lower)) - 3 * xs) + a),
                  10 ** (((numpy.log10(upper)) - 3 * xs) + a),
                  color = 'C1', alpha = 0.25)

plot.legend(['target', 'ExoRM', 'Forecaster'])
plot.xlabel('Radius (R$_{\\oplus}$)')
plot.ylabel('Density (${\\frac{g}{cm^3}}$)')
plot.title('Calculated Density-Radius Relation')

plot.loglog()

# if save: plot.savefig(f'{path}/Figure 4.jpeg')

plot.show()

In [None]:
p_data = data.copy()
columns = ['radius', 'mass']
p_data[columns] = numpy.log10(p_data[columns])
p_data = p_data[(p_data['radius'] < numpy.log10(11.1))].copy()
p_data = p_data.reset_index(drop = True)
p_data

In [None]:
p_data['ExoRM'] = 10 ** erm(p_data['radius'])
p_data['Forecaster'] = 10 ** ForecasterRM.forecaster(p_data['radius'])
p_data['l_mass'] = 10 ** p_data['mass']
p_data['l_radius'] = 10 ** p_data['radius']
p_data['ExoRM res'] = (p_data['l_mass'] - p_data['ExoRM'])
p_data['Forecaster res'] = (p_data['l_mass'] - p_data['Forecaster'])

p_data['name_len'] = p_data['name'].str.len()
p_data = p_data.sort_values(
    by = ['name_len', 'name'],
).reset_index(drop = True)
p_data = p_data.drop(columns = ['name_len'])

columns = ['l_radius', 'l_mass', 'ExoRM', 'Forecaster', 'ExoRM res', 'Forecaster res']

p_data[columns] = p_data[columns].map(
    lambda x: x if x == 0 or math.isnan(x) else round(x, (5 - 1) - int(math.floor(math.log10(abs(x)))))
)

p_data['Percent ExoRM err'] = 100 * (((p_data['ExoRM']) - (p_data['l_mass'])) / (p_data['l_mass'])).abs()
p_data['Percent Forecaster err'] = 100 * (((p_data['Forecaster']) - (p_data['l_mass'])) / (p_data['l_mass'])).abs()

p_data['SAPE ExoRM'] = 100 * (((p_data['ExoRM']) - (p_data['l_mass'])) / ((p_data['l_mass'] + p_data['ExoRM']) / 2)).abs()
p_data['SAPE Forecaster'] = 100 * (((p_data['Forecaster']) - (p_data['l_mass'])) / ((p_data['l_mass'] + p_data['Forecaster']) / 2)).abs()

p_data['ExoRM lerr'] = ((p_data['ExoRM']) - (p_data['l_mass'])).abs()
p_data['Forecaster lerr'] = ((p_data['Forecaster']) - (p_data['l_mass'])).abs()

p_data['winner'] = p_data.apply(
    lambda x: 'ExoRM' if x['Percent ExoRM err'] < x['Percent Forecaster err'] else 'Forecaster', axis = 1
)

if save: p_data[['name'] + columns + ['winner']].to_csv(f'{path}/ExoRM_results.csv', index = False)

p_data.head(10)

In [None]:
p_data[['Percent ExoRM err', 'Percent Forecaster err', 'SAPE ExoRM', 'SAPE Forecaster', 'ExoRM res', 'Forecaster res', 'ExoRM lerr', 'Forecaster lerr']].abs().describe()

In [None]:
p_data['winner'].value_counts() / len(p_data), (p_data['winner'].value_counts()['ExoRM'] / len(p_data)) - (p_data['winner'].value_counts()['Forecaster'] / len(p_data))

In [None]:
_x = x[x < numpy.log10(11.1)]
numpy.mean(erm(_x) - ForecasterRM.forecaster(_x))
# average change metween two models in the < 11.1 radius

In [None]:
comparison = pandas.DataFrame(
    [[10 ** numpy.mean(erm(_x)), 10 ** numpy.mean(ForecasterRM.forecaster(_x))],
     [p_data['ExoRM lerr'].mean(), p_data['Forecaster lerr'].mean()],
     [p_data['ExoRM lerr'].max(), p_data['Forecaster lerr'].max()],
     [p_data['Percent ExoRM err'].mean(), p_data['Percent Forecaster err'].mean()],
     [p_data['Percent ExoRM err'].median(), p_data['Percent Forecaster err'].median()],
     [p_data['SAPE ExoRM'].mean(), p_data['SAPE Forecaster'].mean()],
     ],
    columns = ['ExoRM', 'Forecaster'])

comparison['Difference'] = comparison['ExoRM'] - comparison['Forecaster']

comparison = comparison.map(
    lambda x: x if x == 0 or math.isnan(x) else round(x, (5 - 1) - int(math.floor(math.log10(abs(x)))))
)

if save: comparison.to_csv(f'{path}/filtered_comparison.csv', index = False)

comparison

In [None]:
p_data_long = pandas.melt(
    p_data,
    value_vars = ['ExoRM lerr', 'Forecaster lerr'],
    var_name = 'Model',
    value_name = 'Error (M$_{\\oplus}$)'
)

p_data_long['Model'] = p_data_long['Model'].map(lambda x: 'ExoRM' if x == 'ExoRM lerr' else 'Forecaster')

ax = seaborn.boxplot(data = p_data_long, x = 'Model', y = 'Error (M$_{\\oplus}$)', hue = 'Model', palette = 'viridis', zorder = 1, whis = 1.5, showfliers = False)

plot.title('Error by Model (Radius ≤ 11.1 R$_{\\oplus}$)\n(Outliers Not Displayed)')
# plot.yscale('log')

if save: plot.savefig(f'{path}/Figure 3.jpeg')

plot.show()

In [None]:
from scipy.stats import ttest_rel

stat, p = ttest_rel(p_data['ExoRM lerr'], p_data['Forecaster lerr'])
stat, p

In [None]:
p_data_long = pandas.melt(
    p_data,
    value_vars = ['Percent ExoRM err', 'Percent Forecaster err'],
    var_name = 'Model',
    value_name = 'Absolute Percent Error (%)'
)

p_data_long['Model'] = p_data_long['Model'].map(lambda x: 'ExoRM' if x == 'Percent ExoRM err' else 'Forecaster')

ax = seaborn.boxplot(data = p_data_long, x = 'Model', y = 'Absolute Percent Error (%)', hue = 'Model', palette = 'viridis', zorder = 1, whis = 1.5, showfliers = True, log_scale = True)

plot.title('Percent Error by Model (Radius ≤ 11.1 R$_{\\oplus}$)')

plot.show()

p_data_long

In [None]:
exoplanet_data = read_rm_data()
exoplanet_data = preprocess_data(exoplanet_data)
new_exoplanet_data = exoplanet_data[exoplanet_data['pl_pubdate'] >= '2018']
old_exoplanet_data = exoplanet_data[exoplanet_data['pl_pubdate'] < '2018']
new_exoplanet_data, old_exoplanet_data

In [None]:
len(new_exoplanet_data) / len(exoplanet_data)

In [None]:
plot.scatter(old_exoplanet_data['radius'], old_exoplanet_data['mass'], marker = 'o', alpha = 0.5)
plot.scatter(new_exoplanet_data['radius'], new_exoplanet_data['mass'], marker = 's', alpha = 0.5)

plot.legend(['Unchanged since 2017', 'Updated after 2017'])
plot.xlabel('Radius (R$_{\\oplus}$)')
plot.ylabel('Mass (M$_{\\oplus}$)')
plot.title('Best Available Exoplanet Data')

plot.loglog()

if save: plot.savefig(f'{path}/Figure 1.jpeg')

plot.show()

In [None]:
import arviz
# arviz.plot_pair(
#     erm.trace,
#     var_names = ['ell', 'eta', 'ell_sigma', 'eta_sigma'],
#     kind = 'kde',
#     marginals = True
# )

plot.show()