In [1]:
import timeit
from datetime import datetime
import requests
import pandas as pd
import numpy as np
import os
import urllib
import json
from tqdm import tqdm
import pickle
import gzip
import shutil

In [2]:
# configurables

# local paths
csv_store_path = "../../../siads591 data/space_track_raw/csv/"
log_file_path = "../../../siads591 data/space_track_raw/logs.pkl"
temp_directory = "../../../siads591 data/space_track_raw/tmp/"
cookie_path = "../../../siads591 data/space_track_raw/cookie.pkl"
space_track_credentials = "./space-track-credentials.json"

In [3]:
# setting up default and initial variables
columns = ['GP_ID', 'EPOCH', 'NORAD_CAT_ID', 'MEAN_MOTION', 'ECCENTRICITY', 'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY', 'REV_AT_EPOCH', 'BSTAR', 'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'SEMIMAJOR_AXIS', 'PERIOD', 'APOAPSIS', 'PERIAPSIS']
dtypes = {
    'GP_ID':np.uint32,
    'NORAD_CAT_ID':np.uint32,
    'REV_AT_EPOCH':np.uint32,
    'INCLINATION':np.float64,
    'RA_OF_ASC_NODE':np.float64,
    'ARG_OF_PERICENTER':np.float64,
    'MEAN_ANOMALY':np.float64,
    'SEMIMAJOR_AXIS':np.float64,
    'PERIOD':np.float64,
    'APOAPSIS':np.float64,
    'PERIAPSIS':np.float64,
    'MEAN_MOTION':np.float64,
    'ECCENTRICITY':np.float64,
    'BSTAR':np.float64,
    'MEAN_MOTION_DOT':np.float64,
    'MEAN_MOTION_DDOT':np.float64,
}

In [4]:
# log file helpers

def save_logs():
    logs.to_pickle(log_file_path)
    
def log(log_type,url=None,error=None,input_str=None,output_str=None):
    global logs
    new_log = {'created_on':np.datetime64('now'), 'type':log_type, 'url':url, 'input':input_str, 'error':error, 'output':output_str}
    logs = logs.append(new_log,ignore_index=True)
    save_logs()
    
try:
    logs = pd.read_pickle(log_file_path)
except:
    logs = pd.DataFrame(columns = ["created_on","type","url","error","input_str","output_str"])
    logs = logs.astype({'created_on': 'datetime64[ns]'})
    log('log_created')

In [5]:
def get_latest_gp_id():
    try:
        return max([int(f[:-7].split("-")[1]) for f in os.listdir(f'{csv_store_path}/') if f.endswith(".csv.gz")])+1
    except:
        return 1

def get_space_track_api_count(unit="m"):
    return len(logs[(logs.created_on > (np.datetime64('now') - np.timedelta64(1,unit))) & (logs.type=="fetch_init")])

def space_track_login():
    with open(space_track_credentials) as json_file:
        credentials = json.load(json_file)
    url = "https://www.space-track.org/ajaxauth/login"
    x = requests.post(url, data = credentials)
    if x.status_code == 200:
        log("login", output_str=f'{credentials["identity"]} - success')
        return x.cookies
    else:
        log("login", error=x.status_code)
        print("Login failed with code:",x.status_code)
        return None
    
def generate_url():
    gp_id = get_latest_gp_id()
    end_gp_id = gp_id + 5000000 # sometimes gotta fix this number
    return (gp_id, f'https://www.space-track.org/basicspacedata/query/class/gp_history/GP_ID/{gp_id}--{end_gp_id}/orderby/GP_ID asc/limit/100000/format/csv/emptyresult/show')

def give_me_a_cookie_please():
    # returns an existing cookie if there is one, otherwise, get a new one.
    # return None if can't :(
    try:
        auth_cookie = pickle.load(open(cookie_path, "rb"))
    except:
        auth_cookie = None
    if type(auth_cookie) == requests.cookies.RequestsCookieJar:
        auth_cookie.clear_expired_cookies()
    else:
        auth_cookie = None
    # log in if needed
    if auth_cookie == None or len(auth_cookie) == 0:
        auth_cookie = space_track_login()
        pickle.dump(auth_cookie, open(cookie_path, "wb"))
    # stop if can't log in
    if auth_cookie == None:
        log("skip", error = f'Cannot log in')
        return None
    return auth_cookie

Before you proceed:
* Create the `space-track-credentials.json` file from template with your own credentials
* Make sure all the directories and file paths are created and correct
* Manually test the fetch cell to make sure things are working first
* If using automatic process, set an appropriate intervalTime
* This will probably mess up if your task time is longer than your interval time, don't use the automatic process if so
* If you don't know what to do or are unsure, ask Tim.

In [6]:
raise Exception('This exception is to stop the cells below from running when you do "Run All Cells"')

Exception: This exception is to stop the cells below from running when you do "Run All Cells"

In [None]:
# I have commented this out for now since it's never really necessary to use given my internet speed.  Please feel
# free to use it for autorunning the tasks.  Look at the /playground/tim_autorun-notebook/skeleton_autorun_below.ipynb
# if you want the skeleton autorun code.


# %%html
# <script>
#     var intervalTime = 300000; // 5 minutes in ms
#     if (typeof autorun_toggle === 'undefined') {
#         var autorun_toggle = null;
#     }
#     function toggle_autorun() {
#         var btn = document.getElementById("autorun_button");
#         setTimeout(function(){ // add a delay so the selected cell is correctly set here
#             if (autorun_toggle == null) {
#                 var start_index = IPython.notebook.get_selected_index()+1;
#                 var end_index = IPython.notebook.get_cells().length;
#                 console.log("start autorun, start: " + start_index + " end: " + end_index);
#                 IPython.notebook.execute_cell_range(start_index, end_index);
#                 //$('div.input').hide(200);
#                 autorun_toggle = setInterval(function(){
#                     console.log("Run cells below");
#                     //IPython.notebook.execute_cells_below();
#                     IPython.notebook.execute_cell_range(start_index, end_index);
#                 }, intervalTime);
#             } else {
#                 window.clearInterval(autorun_toggle);
#                 console.log("clear autorun");
#                 autorun_toggle = null;
#                 //$('div.input').show(200);
#             }
#         }, 100);
#     }
# </script>
# <button id="autorun_button" onclick="toggle_autorun()">Toggle autorun cells below</button>

In [7]:
pbar = tqdm(position=0, leave=True)
start_time = timeit.default_timer()
log('run_cell',output_str=f'Last run local time: {datetime.now()}')
# print(f'Last run local time: {datetime.now()}')

last_fetched_gp_id = -1
while True:
    # check API limit
    if get_space_track_api_count("m") >= 25 or get_space_track_api_count("h") >= 275:
        log("skip", error = f'Request limit exceeded: 1m({get_space_track_api_count("m")}) 1h({get_space_track_api_count("h")})')
        break

    # check if already up to date
    current_gp_id = get_latest_gp_id()
    if last_fetched_gp_id == current_gp_id:
        log("up_to_date",output_str=(last_fetched_gp_id))
        break
    
    auth_cookie = give_me_a_cookie_please()

    start_gp, url = generate_url()
    pbar.set_postfix_str(f"downloading gp id starting {start_gp}")
    r = requests.get(url, allow_redirects=True, cookies=auth_cookie)
    
    if r.content == b'NO RESULTS RETURNED':
        log("no_more_results",output_str=(last_fetched_gp_id))
        pbar.set_postfix_str(f"No more results.  Last GP_ID: {current_gp_id}")
        break
    
    # save gzip'ed version of CSV to save space
    pbar.set_postfix_str(f"processing gp id starting {start_gp}")
    dlname = f'{temp_directory}{start_gp}.csv.gz'
    gzip.open(dlname, 'wb').write(r.content)
    
    log("fetch_complete", url=url, input_str=start_gp, output_str=dlname)
    
    df = pd.read_csv(dlname, usecols=columns, parse_dates=['EPOCH'], infer_datetime_format=True, index_col='GP_ID', dtype=dtypes, compression='gzip')

    end_gp = max(df.index)
    
    fname = f'{csv_store_path}{str(start_gp).zfill(9)}-{str(end_gp).zfill(9)}.csv.gz'
    shutil.move(dlname, fname)
    log("csv_move_complete", url=url, input_str=dlname, output_str=fname)
    # note that the CSV file may include a partial downloaded file as its last file_id
    
    pbar.set_postfix_str(f"completed {start_gp}-{end_gp}")

    pbar.update(1)
    
    last_fetched_gp_id = current_gp_id

    
pbar.close()
elapsed = timeit.default_timer() - start_time
print(f"Time used to execute cells: {elapsed} seconds")

1it [00:08,  8.19s/it, No more results.  Last GP_ID: 170814691]

Time used to execute cells: 8.195576899 seconds



