# Constructing a small Ephemeris Dataset
Sam Greydanus | 2023
### Setup
Before running this code, you'll need to download the raw ephemeris data for the planets (or other celestial bodies) that you want to plot. To do this, go to [https://ssd.jpl.nasa.gov/horizons/app.html#/](https://ssd.jpl.nasa.gov/horizons/app.html#/). For 'Ephemeris Type' select 'Vector Table'. For 'Target Body' select the body you want (eg. 'Earth'). For Coordinate Center try using 'Solar System Barycenter (SSB) '. For time specification, I manually selected a timespan of five years; the default data interval is 1440 minutes (1 day). Once you've chosen your desired settings, click 'Generate Ephemeris' and then click 'Download Results' when the results load. This will let you download a .txt file to a local directory. Name the file after the planet, eg `earth.txt`. Repeat this process for all the planets you want, saving each of them to a different text file in the same folder. Once you have all the .txt files saved to that folder, you will be ready to run this code.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def load_planet(planet_name, data_dir):
    '''Reads a file named, eg.,"earth.txt" with ephemeris data in a vector table format
       downloaded from https://ssd.jpl.nasa.gov/horizons/app.html#/'''
    with open(data_dir + '{}.txt'.format(planet_name), 'r') as f:
        text = f.read()

    main_data = text[text.find('$$SOE')+5:text.find('$$EOE')].split('\n')
    s_xyz = main_data[2::4]

    f_xyz = []
    for l in s_xyz:
        splits = [s.strip(' ').split(' ')[0] for s in l.split('=')[1:]]
        f_xyz.append([float(s) for s in splits])
    return np.asarray(f_xyz)

def get_colnames(names):
    '''Generates DataFrame column names for each x, y, z coordinate dimension'''
    colnames = []
    for n in names:
        colnames += [n + '_x', n + '_y', n + '_z']
    return colnames

def get_colformat(coords):
    '''Reshape from [planets, time, xyz] to [time, planets*xyz]'''
    N = coords.shape[0]
    return coords.transpose(1,0,2).reshape(-1,N*3)

def process_raw_ephemeris(planets, data_dir, last_n_days=None):
    '''Loads raw ephemeris files for a list of planet names, organizes the data in a DataFrame,
       and then saves the DataFrame as a csv in the same directory as the raw files.'''
    coords = np.stack([load_planet(p, data_dir) for p in planets])
    if last_n_days is not None:
        coords = coords[:,-last_n_days:]
    df = pd.DataFrame(data=get_colformat(coords), columns=get_colnames(planets))
    df.to_csv(data_dir + 'ephemeris.csv')
    return df

In [None]:
def plot_planets(df, planets, fig=None):
    colors = {'sun':'yellow','venus':'orange','mercury':'pink','earth':'blue','mars':'red'}
    fig = plt.figure(figsize=[5,5], dpi=100) if fig is None else fig
    
    for i, name in enumerate(planets):
        x, y, z = df[name + '_x'], df[name + '_y'], df[name + '_z']
        plt.plot(x, y, color=colors[name], label=name)
        plt.plot(x.iloc[-1], y.iloc[-1], '.', color=colors[name])
    plt.title("Ephemeris data from JPL's Horizon System")
    plt.legend(fontsize=7,  loc='upper right') ; plt.tight_layout()
    return fig
    
planets = ['sun', 'venus', 'earth', 'mars']
data_dir = './data/'
df = process_raw_ephemeris(planets, data_dir, last_n_days=365)
plot_planets(df, planets)
plt.show()