In [1]:
import timeit
from datetime import datetime
import requests
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
import urllib
import json
from tqdm import tqdm
import pickle

In [2]:
# configurables

# local paths
csv_store_path = "../../../../siads591 data/space_track_raw/leo_all/csv/"
pkl_store_path = "../../../../siads591 data/space_track_raw/leo_all/pkl/"
log_file_path = "../../../../siads591 data/space_track_raw/leo_all/logs.pkl"
cookie_path = "../tmp/cookie.pkl"

In [3]:
# setting up default and initial variables
object_types = {'payload':'PAYLOAD', 'rocket_body':'ROCKET BODY', 'debris':'DEBRIS'}
columns = ['EPOCH', 'NORAD_CAT_ID', 'MEAN_MOTION', 'ECCENTRICITY', 'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY', 'REV_AT_EPOCH', 'BSTAR', 'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'SEMIMAJOR_AXIS', 'PERIOD', 'APOAPSIS', 'PERIAPSIS']
dtypes = {'NORAD_CAT_ID':np.uint32,
          'REV_AT_EPOCH':np.uint32,
          'INCLINATION':np.float64,
          'RA_OF_ASC_NODE':np.float64,
          'ARG_OF_PERICENTER':np.float64,
          'MEAN_ANOMALY':np.float64,
          'SEMIMAJOR_AXIS':np.float64,
          'PERIOD':np.float64,
          'APOAPSIS':np.float64,
          'PERIAPSIS':np.float64,
          'MEAN_MOTION':np.float64,
          'ECCENTRICITY':np.float64,
          'BSTAR':np.float64,
          'MEAN_MOTION_DOT':np.float64,
          'MEAN_MOTION_DDOT':np.float64,
         }

In [4]:
# log file helpers

def save_logs():
    logs.to_pickle(log_file_path)
    
def log(log_type,url=None,epoch=None,error=None,output=None):
    global logs
    new_log = {'created_on':np.datetime64('now'), 'type':log_type, 'url':url, 'epoch':epoch, 'error':error, 'output':output}
    logs = logs.append(new_log,ignore_index=True)
    save_logs()
    
try:
    logs = pd.read_pickle(log_file_path)
except:
    logs = pd.DataFrame(columns = ["created_on","type","url","epoch","error","output"])
    logs = logs.astype({'created_on': 'datetime64[ns]'})
    log('log_created')

In [5]:
def get_current_file_count():
    try:
        return max([int(f[:-7]) for f in listdir(f'{pkl_store_path}{obj_type}/') if f.endswith(".pkl.gz")])
    except:
        return -1

def get_last_epoch(obj_type):
    try:
        df = pd.read_pickle(f"{pkl_store_path}{obj_type}/%05d.pkl.gz" % get_current_file_count(obj_type),"gzip")
        return df.index[-1:].astype("datetime64[ns]").values[0]
    except:
        return np.datetime64('1959-01-01')

def get_space_track_api_count(unit="m"):
    return len(logs[(logs.created_on > (np.datetime64('now') - np.timedelta64(1,unit))) & (logs.type=="fetch_init")])

def space_track_login():
    with open("../space-track-credentials.json") as json_file:
        credentials = json.load(json_file)
#     print("Logging in space-track.org with identity:",credentials['identity'])
    url = "https://www.space-track.org/ajaxauth/login"
    x = requests.post(url, data = credentials)
    if x.status_code == 200:
        print("Logged in")
#         log("login", output=f'{credentials["identity"]} - success')
        return x.cookies
    else:
        log("login", error=x.status_code)
        print("Login failed with code:",x.status_code)
        return None
    
def generate_url(obj_type, epoch):
    epoch = epoch - 1 # we'll get duplicates here, but needed incase there are duplicated epoch that got cut off
    l = np.datetime_as_string(epoch, unit='us')
    if epoch.astype('datetime64[Y]').item().year > 2014:
        upper = epoch + np.timedelta64(60,'D') # after 2014
    elif epoch.astype('datetime64[Y]').item().year > 2009:
        upper = epoch + np.timedelta64(180,'D') # after 2009
    elif epoch.astype('datetime64[Y]').item().year > 1975:
        upper = epoch + np.timedelta64(365,'D') # after 1975
    else:
        upper = epoch + np.timedelta64(3650,'D') # before 1975
    u = np.datetime_as_string(upper, unit='us')
    return (l, u, f'https://www.space-track.org/basicspacedata/query/class/gp_history/ECCENTRICITY/<0.25/MEAN_MOTION//>11.25/EPOCH/>{l},<{u}/OBJECT_TYPE/{object_types[obj_type]}/orderby/EPOCH asc/limit/100000/format/csv/emptyresult/show')

def give_me_a_cookie_please():
    # returns an existing cookie if there is one, otherwise, get a new one.
    # return None if can't :(
    try:
        auth_cookie = pickle.load(open(cookie_path, "rb"))
    except:
        auth_cookie = None
    if type(auth_cookie) == requests.cookies.RequestsCookieJar:
        auth_cookie.clear_expired_cookies()
    else:
        auth_cookie = None
    # log in if needed
    if auth_cookie == None or len(auth_cookie) == 0:
        auth_cookie = space_track_login()
        pickle.dump(auth_cookie, open(cookie_path, "wb"))
    # stop if can't log in
    if auth_cookie == None:
        log("skip", error = f'Cannot log in')
        return None
    return auth_cookie

Before you proceed:
* Create the `space-track-credentials.json` file from template with your own credentials
* Make sure all the directories and file paths are created and correct
* Manually test the fetch cell to make sure things are working first
* If using automatic process, set an appropriate intervalTime
* This will probably mess up if your task time is longer than your interval time, don't use the automatic process if so
* If you don't know what to do or are unsure, ask Tim.

In [6]:
raise Exception('This exception is to stop the cells below from running when you do "Run All Cells"')

Exception: This exception is to stop the cells below from running when you do "Run All Cells"

In [None]:
# I have commented this out for now since it's never really necessary to use given my internet speed.  Please feel
# free to use it for autorunning the tasks.  Look at the /playground/tim_autorun-notebook/skeleton_autorun_below.ipynb
# if you want the skeleton autorun code.


# %%html
# <script>
#     var intervalTime = 300000; // 5 minutes in ms
#     if (typeof autorun_toggle === 'undefined') {
#         var autorun_toggle = null;
#     }
#     function toggle_autorun() {
#         var btn = document.getElementById("autorun_button");
#         setTimeout(function(){ // add a delay so the selected cell is correctly set here
#             if (autorun_toggle == null) {
#                 var start_index = IPython.notebook.get_selected_index()+1;
#                 var end_index = IPython.notebook.get_cells().length;
#                 console.log("start autorun, start: " + start_index + " end: " + end_index);
#                 IPython.notebook.execute_cell_range(start_index, end_index);
#                 //$('div.input').hide(200);
#                 autorun_toggle = setInterval(function(){
#                     console.log("Run cells below");
#                     //IPython.notebook.execute_cells_below();
#                     IPython.notebook.execute_cell_range(start_index, end_index);
#                 }, intervalTime);
#             } else {
#                 window.clearInterval(autorun_toggle);
#                 console.log("clear autorun");
#                 autorun_toggle = null;
#                 //$('div.input').show(200);
#             }
#         }, 100);
#     }
# </script>
# <button id="autorun_button" onclick="toggle_autorun()">Toggle autorun cells below</button>

In [7]:
pbar = tqdm(position=0, leave=True)
start_time = timeit.default_timer()
log('run_cell',output=f'Last run local time: {datetime.now()}')
# print(f'Last run local time: {datetime.now()}')

for obj_type in object_types:
    last_epoch = None
    while True:
        if get_space_track_api_count("m") >= 25 or get_space_track_api_count("h") >= 275:
            log("skip", error = f'Request limit exceeded: 1m({get_space_track_api_count("m")}) 1h({get_space_track_api_count("h")})')
            break
        auth_cookie = give_me_a_cookie_please()
        
        epoch = get_last_epoch(obj_type)
        if last_epoch == epoch:
            log("up_to_date",output=(obj_type,epoch))
            break
        last_epoch = epoch

        start, _, url = generate_url(obj_type,epoch)
        pbar.set_postfix_str(f"downloading {obj_type} - {epoch}")
        r = requests.get(url, allow_redirects=True, cookies=auth_cookie)
        fname = f'{csv_store_path}{obj_type}/{start}.csv'.replace(":","_")
        open(fname, 'wb').write(r.content)
        log("fetch_success", url=url, epoch=start, output=fname)
        oname = (f"{pkl_store_path}{obj_type}/%05d.pkl.gz" % (get_current_file_count(obj_type)+1)).replace(":","_")
        pbar.set_postfix_str(f"processing {obj_type} - {epoch}")
        df = pd.read_csv(fname, usecols=columns, parse_dates=['EPOCH'], infer_datetime_format=True, index_col='EPOCH', dtype=dtypes)
        df.to_pickle(oname,"gzip")
        
        log("process_complete", url=url, epoch=start, output=oname)
        pbar.set_postfix_str(f"completed {obj_type} - {epoch}")
        
        pbar.update(1)
    
pbar.close()
elapsed = timeit.default_timer() - start_time
print(f"Time used to execute cells: {elapsed} seconds")

132it [1:10:28, 32.39s/it, downloading payload - 2020-01-27T20:10:57.896832000]

Logged in


246it [2:13:54, 40.15s/it, downloading rocket_body - 2009-02-27T07:55:56.352576000]

KeyboardInterrupt: 

In [None]:
logs.tail()