# Retrieve astreroids dataset
Written by Kiyoaki Okudaira<br>
*Kyushu University Hanada Lab / University of Washington / IAU CPS SatHub<br>
(okudaira.kiyoaki.528@s.kyushu-u.ac.jp or kiyoaki@uw.edu)<br>
<br>
Retrieve asteroids data set from NASA SENTRY / ESA AEGIS API and parse data set.<br>
<br>
**History**<br>
coding 2026-02-03 : 1st coding<br>
<br>
This code is written for University of Washington ASTR 302 WINTER 2026 Project NEO.<br>
(c) 2026 Kiyoaki Okudaira - Kyushu University Hanada Lab (SSDL) / University of Washington / IAU CPS SatHub

### Parameters
**API settings**

In [1]:
opt_update_dataset = False  # Retrieve latest dataset | bool
ip_min             = None   # Minimum impact probability for searching NASA SENTRY API | float or bool(None)

### Import and initial settings
**PATH settings**

In [None]:
base_PATH = "/Users/kiyoaki/VScode/ASTR302_projectNEO/"
output_PATH = base_PATH + "data"

**Standard libraries**

In [3]:
import json
import pickle
from astropy.table import Table, vstack
from tqdm.notebook import tqdm
import time
import numpy as np

**Custom libraries**

In [4]:
from src import api_data

### NASA SENTRY Dataset
**Retrieve / import dataset**<br>
API URL : https://ssd-api.jpl.nasa.gov/sentry.api

In [5]:
if opt_update_dataset is True:
    data_query_result = api_data.retrieve_NASA_sentry_data(ip_min)
    with open(f'{output_PATH}/NASA_sentry_data.json','w') as json_output:
        json.dump(data_query_result, json_output, ensure_ascii=False, indent=4, sort_keys=False, separators=(',', ': '))

**Retrieve / import summary data**<br>
API URL : https://ssd-api.jpl.nasa.gov/sentry.api

In [6]:
if opt_update_dataset is True:
    summary_query_result = api_data.retrieve_NASA_sentry_summary(ip_min)
    with open(f'{output_PATH}/NASA_sentry_summary.json','w') as json_output:
        json.dump(summary_query_result, json_output, ensure_ascii=False, indent=4, sort_keys=False, separators=(',', ': '))

**Parse dataset**<br>
Prase query result as Astropy Table format

In [7]:
if opt_update_dataset is True:
    NASA_table = api_data.dict_list2table(data_query_result["data"])
    NASA_table.write(f'{output_PATH}/NASA_table.csv', overwrite = True)
    NASA_table = Table.read(f'{output_PATH}/NASA_table.csv')
    with open(f'{output_PATH}/workspace_NASA_table.pkl', 'wb') as f:
        pickle.dump(NASA_table, f)
else:
    with open(f'{output_PATH}/workspace_NASA_table.pkl', "rb") as f:
        NASA_table = pickle.load(f)

Add Velocity_infinity data to dataset from summary data

In [8]:
if opt_update_dataset is True:
    NASA_summary_table = api_data.dict_list2table(summary_query_result["data"])
    NASA_summary_table.write(f'{output_PATH}/NASA_summary_table.csv', overwrite = True)
    NASA_summary_table = Table.read(f'{output_PATH}/NASA_summary_table.csv')
    with open(f'{output_PATH}/workspace_NASA_summary_table.pkl', 'wb') as f:
        pickle.dump(NASA_summary_table, f)
    v_infs = []
    for f in NASA_table:
        v_infs.append(NASA_summary_table[np.where(NASA_summary_table["id"] == f["id"])[0][0]]["v_inf"])
    NASA_table["v_inf"] = v_infs
    with open(f'{output_PATH}/workspace_NASA_table.pkl', 'wb') as f:
        pickle.dump(NASA_table, f)
else:
    with open(f'{output_PATH}/workspace_NASA_summary_table.pkl', "rb") as f:
        NASA_summary_table = pickle.load(f)

Display table

In [9]:
NASA_table = NASA_table.group_by("des")
NASA_table

fullname,des,ip,sigma_vi,ps,energy,sigma_mc,method,id,date,ts,v_inf
str24,str10,float64,float64,float64,float64,float64,str4,str8,str13,int64,float64
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.38,1425.0,2.1605,MC,a0101955,2290-09-25.12,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.36,1420.0,2.0531,MC,a0101955,2281-09-24.57,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.35,1420.0,0.9476,MC,a0101955,2272-09-24.38,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.32,1417.0,1.9442,MC,a0101955,2257-09-24.58,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.32,1420.0,2.7445,MC,a0101955,2254-09-24.85,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.27,1422.0,2.6204,MC,a0101955,2231-09-25.85,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.27,1421.0,2.3651,MC,a0101955,2231-09-25.66,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.27,1420.0,1.4184,MC,a0101955,2231-09-25.59,--,5.9916984432395
101955 Bennu (1999 RQ36),101955,1e-07,--,-5.26,1421.0,1.1506,MC,a0101955,2228-09-24.73,--,5.9916984432395
...,...,...,...,...,...,...,...,...,...,...,...


In [10]:
len(NASA_table.groups)

2066

### ESA AEGIS Dataset
**Retrieve / import risk list**<br>
API URL : https://neo.ssa.esa.int/PSDB-portlet/download?file=esa_risk_list<br>
DOCUMENT : https://neo.ssa.esa.int/computer-access

In [11]:
if opt_update_dataset is True:
    query_result = api_data.retrieve_ESA_AEGIS_risk_list()
    with open(f'{output_PATH}/ESA_aegis_data.txt','w') as csv_output:
        csv_output.write(query_result)

**Parse risk list**<br>
Prase query result as Astropy Table format

In [12]:
if opt_update_dataset is True:
    ESA_risk_list = api_data.parse_ESA_AEGIS_risk_list(query_result)
    ESA_risk_list.write(f'{output_PATH}/ESA_risk_list.csv', overwrite = True)
    with open(f'{output_PATH}/workspace_ESA_risk_list.pkl', 'wb') as f:
        pickle.dump(ESA_risk_list, f)
else:
    with open(f'{output_PATH}/workspace_ESA_risk_list.pkl', "rb") as f:
        ESA_risk_list = pickle.load(f)

In [13]:
ESA_risk_list

des,name,diameter_m,diameter_estimated,vi_datetime_utc,ip_max,ps_max,ts,vel_km_s,years_range,ip_cum,ps_cum
str9,str8,float64,bool,str16,float64,float64,object,float64,str9,float64,float64
2023VD3,,14.0,True,2034-11-08 17:08,0.00235,-2.67,0,21.01,2034-2039,0.00235,-2.67
2008JL3,,30.0,True,2027-05-01 09:05,0.000149,-2.73,0,14.01,2027-2122,0.000161,-2.73
1979XB,,500.0,True,2056-12-12 21:38,2.34e-07,-2.82,0,27.54,2056-2113,7.34e-07,-2.7
2000SG344,,40.0,True,2071-09-16 00:54,0.000895,-3.18,0,11.27,2069-2122,0.00282,-2.77
2005QK76,,40.0,True,2030-02-26 08:15,3.01e-05,-3.39,0,22.66,2030-2108,6.42e-05,-3.26
2007KE4,,30.0,True,2029-05-26 00:18,4.27e-05,-3.47,0,15.03,2026-2115,4.37e-05,-3.47
2021GX9,,30.0,True,2032-04-16 21:51,5.03e-05,-3.49,0,20.17,2032-2032,5.03e-05,-3.49
2023DO,,27.0,True,2057-03-23 19:43,0.000489,-3.5,0,13.18,2057-2073,0.0005,-3.49
2024RF10,,11.0,True,2092-09-07 12:39,0.00642,-3.59,0,14.21,2092-2118,0.00766,-3.51
...,...,...,...,...,...,...,...,...,...,...,...


In [14]:
len(ESA_risk_list)

1904

**Retrieve / import dataset**<br>
API URL : https://neo.ssa.esa.int/PSDB-portlet/download?file=$desig.risk<br>
DOCUMENT : https://neo.ssa.esa.int/computer-access

In [15]:
if opt_update_dataset is True:
    names = (
        "des", "name", "diameter_m", "diameter_estimated",
        "vi_datetime_utc", "ip_max", "ps_max",
        "vel_km_s", "years_range", "ip_cum", "ps_cum",
        "date", "mjd", "sigma", "sigimp",
        "dist_re", "width_re", "stretch_re",
        "p_re", "exp_energy_mt", "ps", "ts"
    )
    ESA_table = Table(names=names, dtype=[object]*len(names))
    i = 0
    for row in tqdm(ESA_risk_list):
        i = i + 1
        query_result = api_data.retrieve_ESA_AEGIS_data(row["des"])
        new_rows = api_data.parse_ESA_AEGIS_data(query_result,row)
        ESA_table = vstack([ESA_table, new_rows],join_type="outer")
        if i % 8 < 1:
            time.sleep(4)
            with open(f'{output_PATH}/workspace_ESA_table.pkl', 'wb') as f:
                pickle.dump(ESA_table, f)
    with open(f'{output_PATH}/workspace_ESA_table.pkl', 'wb') as f:
        pickle.dump(ESA_table, f)
    ESA_table.write(f'{output_PATH}/ESA_table.csv', overwrite = True)
    ESA_table = Table.read(f'{output_PATH}/ESA_table.csv')
    with open(f'{output_PATH}/workspace_ESA_table.pkl', 'wb') as f:
        pickle.dump(ESA_table, f)
else:
    with open(f'{output_PATH}/workspace_ESA_table.pkl', "rb") as f:
        ESA_table = pickle.load(f)

Display table

In [16]:
ESA_table = ESA_table.group_by("des")
ESA_table

des,name,diameter_m,diameter_estimated,vi_datetime_utc,ip_max,ps_max,vel_km_s,years_range,ip_cum,ps_cum,date,mjd,sigma,sigimp,dist_re,width_re,stretch_re,p_re,exp_energy_mt,ps,ts
str9,str8,float64,str5,str16,float64,float64,float64,str9,float64,float64,str14,float64,float64,float64,float64,float64,float64,float64,float64,float64,int64
1979XB,--,500.0,True,2056-12-12 21:38,2.34e-07,-2.82,27.54,2056-2113,7.34e-07,-2.7,2056-12-12.902,72344.902,0.255,0.0,0.22,0.106,3530000.0,2.34e-07,55600.0,-2.82,0
1979XB,--,500.0,True,2056-12-12 21:38,2.34e-07,-2.82,27.54,2056-2113,7.34e-07,-2.7,2065-12-16.463,75635.463,-1.11,0.0,1.1,0.091,35800000.0,2.72e-09,12100.0,-5.39,0
1979XB,--,500.0,True,2056-12-12 21:38,2.34e-07,-2.82,27.54,2056-2113,7.34e-07,-2.7,2101-12-14.204,88781.204,-0.384,0.0,0.61,0.0,197000000.0,3.46e-09,24900.0,-5.3,0
1979XB,--,500.0,True,2056-12-12 21:38,2.34e-07,-2.82,27.54,2056-2113,7.34e-07,-2.7,2113-12-14.753,93164.753,-0.706,0.0,0.25,0.028,1370000.0,4.94e-07,17800.0,-3.33,0
1991BA,--,6.0,True,2032-01-19 17:37,6.15e-07,-7.16,19.92,2026-2115,5.29e-06,-6.74,2026-01-17.658,61057.658,0.225,0.494,1.16,48.969,2520000.0,2.55e-09,0.0197,-8.31,0
1991BA,--,6.0,True,2032-01-19 17:37,6.15e-07,-7.16,19.92,2026-2115,5.29e-06,-6.74,2027-01-19.190,61424.19,-3.669,0.0,0.54,0.013,313000.0,3.45e-09,0.0047,-9.07,0
1991BA,--,6.0,True,2032-01-19 17:37,6.15e-07,-7.16,19.92,2026-2115,5.29e-06,-6.74,2032-01-19.735,63250.735,-1.389,0.0,0.26,0.049,584000.0,6.15e-07,0.00933,-7.16,0
1991BA,--,6.0,True,2032-01-19 17:37,6.15e-07,-7.16,19.92,2026-2115,5.29e-06,-6.74,2036-01-19.840,64711.84,-1.121,0.0,0.44,0.516,5970000.0,7.48e-08,0.0105,-8.23,0
1991BA,--,6.0,True,2032-01-19 17:37,6.15e-07,-7.16,19.92,2026-2115,5.29e-06,-6.74,2038-01-18.946,65441.946,-4.2,0.0,0.66,0.037,273000.0,4.72e-10,0.00415,-10.82,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
