In [133]:
# SOCRATES—Satellite Orbital Conjunction Reports Assessing Threatening Encounters in Space
# SOCRATES uses Satellite Tool Kit’s Conjunction Analysis Tools (STK/CAT) and the NORAD SGP4 propagator implemented in STK

import requests
import urllib.request
import time
from bs4 import BeautifulSoup
from datetime import datetime
from datetime import timedelta
from os import listdir
from os.path import isfile, join
import re

def get_last_save_date(path):
    '''
    Get the date on the most recent file
    
    Parameters:
    -----------
    path : str
        Relative file path
    
    Returns
    -------
    file : str
        The most recent filename
        
    date : datetime
        Contains the most recent date
    '''
    
    dates = [ (match[0],match[1]) for f in listdir(path) if isfile(join(path, f))  if (match:=re.search('^socrates_([0-9]{14}).csv$', f))]
    try:
        file,date = sorted(dates, reverse=True)[0]
        return file, datetime.strptime(date, '%Y%m%d%H%M%S')
    except:
        return '', datetime.min

def scrape_socrates(num_of_records, min_hours, data_file_path):
    '''
    Scrape the SOCRATES website for upcoming close flybys
    
    Parameters:
    -----------
    num_of_records : int
        Number of records to request from SOCRATES
    
    min_hours : int
        Minimum number of hours betwen file saves
        
    data_file_path: str
        Relative file path
    '''

    # Scrape data
    print('Making web request...')
    url = 'https://celestrak.com/SOCRATES/search-results.php?IDENT=NAME&NAME_TEXT1=&NAME_TEXT2=&CATNR_TEXT1=&CATNR_TEXT2=&ORDER=MAXPROB&MAX=' + str(num_of_records) + '&B1=Submit'
    response = requests.get(url)
    print('Request complete.  Begin Parsing...')

    # Save the date this was scraped
    extract_date = datetime.utcnow()

    # Parse Data
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find_all('table')[3]
    rows = []
    cidx_map = {1: 'sat1_norad', 2: 'sat1_name', 3: 'sat1_days_epoch', 4: 'max_prob', 5: 'dil_thr_km', 6: 'min_rng_km',
                7: 'rel_velo_kms', 8: 'sat2_norad', 9: 'sat2_name', 10: 'sat2_days_epoch', 11: 'start_time',
                12: 'tca_time', 13: 'stop_time'}

    for record in table.find_all('form'):
        row = {}
        for idx, cell in enumerate(record.find_all('td')):
            if idx in cidx_map.keys():
                row[cidx_map[idx]] = cell.text
        rows.append(row)
    print('Parsing complete.')

    # Convert the data into a Pandas Dataframe
    df = pd.DataFrame(rows)

    # Save the file if none newer than the min_hours exists
    recent_file, recent_date = get_last_save_date(data_file_path)
    time_dif = extract_date - recent_date
    if time_dif > timedelta(hours=min_hours):
        filename = 'socrates_' + extract_date.strftime('%Y%m%d%H%M%S') + '.csv'
        df.to_csv(data_file_path + filename, index=False)
        print(f'Saving of file \'{filename}\' complete.  Please be sure to commit new file!')
    else:
        print(f'Not saving file since a file was created {time_dif} ago: {recent_file}')

    return df
    

# Parameters:
#-----------------
num_of_records = 1000
min_hours = 6
data_file_path = '../data/socrates/'

df = scrape_socrates (num_of_records, min_hours, data_file_path)
df.head()

Making web request...
Request complete.  Begin Parsing...
Parsing complete.
Saving of file 'socrates_20201209030741.csv' complete.  Please be sure to commit new file!


Unnamed: 0,sat1_norad,sat1_name,sat1_days_epoch,max_prob,dil_thr_km,min_rng_km,rel_velo_kms,sat2_norad,sat2_name,sat2_days_epoch,start_time,tca_time,stop_time
0,44421,COSMOS 2535 [+],2.631,0.2147,0.001,0.004,0.0,44424,COSMOS 2536 [+],2.698,2020 Dec 08 12:00:00.000,2020 Dec 10 04:26:16.817,2020 Dec 15 12:00:00.000
1,14452,METEOR 2-10 [?],4.297,0.006346,0.037,0.052,14.803,41302,NOAA 16 DEB [-],3.78,2020 Dec 12 03:23:20.504,2020 Dec 12 03:23:20.841,2020 Dec 12 03:23:21.179
2,46740,STARLINK-1848 [+],6.765,0.003746,0.008,0.034,0.001,46755,STARLINK-1924 [+],6.682,2020 Dec 14 13:26:16.621,2020 Dec 14 22:21:24.721,2020 Dec 15 03:36:49.529
3,12409,COSMOS 1266 [?],2.75,0.002626,0.033,0.134,4.28,9703,DELTA 1 DEB [-],3.497,2020 Dec 10 20:50:05.822,2020 Dec 10 20:50:06.990,2020 Dec 10 20:50:08.158
4,14452,METEOR 2-10 [?],4.226,0.001983,0.066,0.093,14.804,41302,NOAA 16 DEB [-],3.71,2020 Dec 12 01:41:42.884,2020 Dec 12 01:41:43.222,2020 Dec 12 01:41:43.559
