# Data Filtering and generation

This notebook filters the original data on the forked repo to get only Near Earth OBjects (NEO).
Additionally, it generate new from downloaded dataset from [JPL SBDB](https://ssd.jpl.nasa.gov/tools/sbdb_query.html) potentially hazardous asteroids (PHA), Apollo Orbit Class, ordered by Earth Orbit Minimum Intersection Distance (EARTH MOID)

## Library loading

In [81]:
import numpy as np
import pandas as pd
import re

## Data filtering

In [82]:
ASTEROIDS = pd.read_csv("asteroids.csv.orig")
ASTEROIDSX = pd.read_csv("asteroids2.csv.orig")
MOONS = pd.read_csv("moons.csv.orig")
COMETS = pd.read_csv("comets.csv.orig")

In [83]:
ASTEROIDS0 = ASTEROIDS[ ASTEROIDS.a * (1 - ASTEROIDS.e) <= 1.3]
ASTEROIDSX0 = ASTEROIDSX[ ASTEROIDSX.a * (1 - ASTEROIDSX.e) <= 1.3]
COMETS0 = COMETS[ COMETS.q <= 1.3]

In [84]:
ASTEROIDS['numN'] = ASTEROIDS.num.dropna().astype(int).astype(str) + ' ' + ASTEROIDS.name
ASTEROIDSX['numN'] = ASTEROIDSX.num.dropna().astype(int).astype(str) + ' ' + ASTEROIDSX.name

In [85]:
MOONS0 = MOONS[
  -MOONS.orbiting.isin(ASTEROIDS[ASTEROIDS.a * (1 - ASTEROIDS.e) > 1.3].name.dropna())
]
MOONS1 = MOONS0[
  -MOONS0.orbiting.isin(ASTEROIDS[ASTEROIDS.a * (1 - ASTEROIDS.e) > 1.3].numN.dropna())
]

In [86]:
MOONS2 = MOONS1[
  -MOONS1.orbiting.isin(ASTEROIDSX[ASTEROIDSX.a * (1 - ASTEROIDSX.e) > 1.3].name.dropna())
]
MOONS3 = MOONS2[
  -MOONS2.orbiting.isin(ASTEROIDSX[ASTEROIDSX.a * (1 - ASTEROIDSX.e) > 1.3].numN.dropna())
]

In [94]:
ASTEROIDS0.to_csv('asteroids.csv', index=False)
ASTEROIDSX0.to_csv('asteroids2.csv', index=False)

In [88]:
MOONS3.to_csv('moons.csv', index=False)

In [89]:
COMETS0.to_csv('comets.csv', index=False)

## Data generation

In [95]:
PHA = pd.read_csv('jpl_sbdb_PHA_Apollo.csv').sort_values(by=['moid']).reset_index(drop=True)

In [96]:
PHA['name'] = [ re.sub(r'^\d+\s+', '', str(x))for x in PHA.full_name ]

In [97]:
PHA0 = (PHA
        .drop(['full_name', 'pha','moid','diameter'], axis=1)
        .rename(columns={'pdes':'num', 'ma':'m', 'i':'inc', 'om':'omega', 'epoch_mjd':'epoch'}))
PHA0['info'] = np.nan
PHA0['wiki'] = np.nan
PHA0['wikipic'] = ''
PHA1 = PHA0[ASTEROIDSX0.columns.tolist()]
PHA2 = PHA1[-PHA1.num.isin(ASTEROIDS0.num)]
PHA3 = PHA2[-PHA2.num.isin(ASTEROIDSX0.num)]

In [98]:
PHA3.to_csv('asteroids2.csv', mode='a', index=False, header=False)