In [1]:
import pandas as pd
from os import listdir, remove
from os.path import isfile, join
import re

import spacetrack.operators as op
from spacetrack import SpaceTrackClient
from datetime import datetime, timedelta

from tqdm import tqdm

spacetrack_usr, spacetrack_pwd = open('./spacetrack_pwd.key').read()[:-1].split(',')
st = SpaceTrackClient(identity=spacetrack_usr, password=spacetrack_pwd)

In [51]:
def get_all_socrates_data(path):
    '''
    Builds a dataframe out of all the socrates data files
    
    Parameters:
    -----------
    path : str
        Relative file path of socrates files
    
    Returns
    -------
    df : Pandas Dataframe
        Combined set of all socrates data
    '''
    files = [ (match[0],match[1]) for f in listdir(path) if isfile(join(path, f))  if (match:=re.search('^socrates_([0-9]{14})\.csv(\.gz)?$', f))]
    files

    # Build single dataset
    df = pd.DataFrame()
    for file,date in files:
        tmp_df = pd.read_csv(path + file)
        df = pd.concat([df,tmp_df])


    # Fix dates and timedeltas
    df['extract_date'] = pd.to_datetime(df['extract_date'], format='%Y-%m-%d %H:%M:%S.%f')
    df['start_time'] = pd.to_datetime(df['start_time'], format='%Y %b %d %H:%M:%S.%f')
    df['tca_time'] = pd.to_datetime(df['tca_time'], format='%Y %b %d %H:%M:%S.%f')
    df['stop_time'] = pd.to_datetime(df['stop_time'], format='%Y %b %d %H:%M:%S.%f')
    df['sat1_days_epoch'] = pd.to_timedelta(df['sat1_days_epoch'], 'd')
    df['sat2_days_epoch'] = pd.to_timedelta(df['sat2_days_epoch'], 'd')
    df['sat1_last_epoch'] = df['tca_time'] - df['sat1_days_epoch']
    df['sat2_last_epoch'] = df['tca_time'] - df['sat2_days_epoch']

    # Add "pair" column
    df['sat_pair'] = df.apply(lambda x: x['sat1_name'] + '-' + x['sat2_name'], axis=1)
    
    return df

group_num = 0
def set_group_number(x):
    '''
    Returns group number for each row (via pd.apply)
    
    Parameters:
    -----------
    x : Boolean
    
    Returns
    -------
    group_num : int
    '''
    global group_num
    if x:
        group_num += 1
    return group_num

def get_socrates_cleaned_data(path):
    '''
    Builds a dataframe out of all the socrates data files
    and remove duplicates and sorts
    
    Parameters:
    -----------
    path : str
        Relative file path of socrates files
    
    Returns
    -------
    df : Pandas Dataframe
        Combined set of all socrates data
    '''
    df = get_all_socrates_data(path)

    # Clean the data
    # Remove duplicates - keep the first occurence of a sat-pair and tca_time
    df = df.drop_duplicates(subset=['sat_pair', 'tca_time'], keep='first')

    # Set a group number (some entries have TCA times that change slightly and these will be grouped together)
    df = df.sort_values(['sat_pair','tca_time'])
    df['group'] = ((df['sat_pair'] != df['sat_pair'].shift(1)) | (df['tca_time']-df['tca_time'].shift(1) > pd.Timedelta('1 min'))).apply(set_group_number)

    # Resort
    df = df.sort_values(['group','extract_date'])
    
    return df

def get_socrates_with_tle_data(df, tle_data_path):
    '''
    Merges the socrates data with the TLE data to create a new dataframe
    
    Parameters:
    -----------
    df : Pandas Dataframe
        The socrates dataframe
        
    tle_data_path : str
        Relative file path of TLE data
    
    Returns
    -------
    df : Pandas Dataframe
        Trimmed set of socrates data with TLE data (from file)
    '''
    # Get last row (most recent socrates record) of each group
    g = df.groupby('group')
    gdf = g.tail(1)

    # Open the TLE Pickle file and merge
    tle_df = pd.read_pickle(tle_data_path)
    gdf = gdf.merge(tle_df, left_on=['sat_pair','tca_time', 'sat1_norad', 'sat2_norad'], right_on=['sat_pair','tca_time', 'sat1_norad', 'sat2_norad'], how='left')
    
    return gdf
    
def get_all_socrates_and_tle_data(socrates_files_path, tle_file_path):
    '''
    Returns Socrates and TLE data joined together
    
    Parameters:
    -----------
    socrates_files_path : str
        Relative path to socrates data
        
    tle_file_path : str
        Relative file path of TLE data
    
    Returns
    -------
    soc_df : Pandas Dataframe
        Socrates complete data
        
    tle_df : Pandas Dataframe
        Trimmed set of socrates data with TLE data (from file only)
    '''
    tle_file_path = '../../data/socrates_tca_gp_history_tle.pkl.gz'
    
    soc_df = get_socrates_cleaned_data(socrates_files_path)
    tle_df = get_socrates_with_tle_data(soc_df, tle_file_path)
    
    # This enables progress_apply so we get a progressbar
    #tqdm.pandas(desc="Getting TLE Data from API...")
    
    # Get missing TLE data from API
    #gdf['sat1_tle'],gdf['sat1_tle_epoch'],gdf['sat2_tle'],gdf['sat2_tle_epoch'] =zip(*gdf.progress_apply(get_missing_tle_gp_history, axis=1))
    #gdf[['sat_pair','tca_time','sat1_norad','sat2_norad','sat1_tle','sat1_tle_epoch','sat2_tle','sat2_tle_epoch']].to_pickle(tle_file_path, 'gzip')
    
    return soc_df, tle_df

socrates_files_path = '../../data/socrates/'
tle_file_path = '../../data/socrates_tca_gp_history_tle.pkl.gz'

soc_df, tle_df = get_all_socrates_and_tle_data(socrates_files_path, tle_file_path)

In [52]:
tmp_tle_file = './tle2.csv'

# Create a new df of the socrates entries with missing TLE data
mtle_df1 = tle_df[tle_df['sat1_tle'].isnull()][['sat1_norad','sat1_last_epoch']].rename(columns={'sat1_norad':'norad','sat1_last_epoch':'last_epoch'})
mtle_df2 = tle_df[tle_df['sat2_tle'].isnull()][['sat2_norad','sat2_last_epoch']].rename(columns={'sat2_norad':'norad','sat2_last_epoch':'last_epoch'})
miss_tle_df = pd.concat([mtle_df1, mtle_df2])
miss_tle_df = miss_tle_df.sort_values('last_epoch')

# Split the missing TLE dataset into bins
bin_size = 100
num_bins = round(len(miss_tle_df) / bin_size + 0.49999)
miss_tle_df['bin'] = pd.qcut(miss_tle_df['last_epoch'], num_bins, labels=list(range(num_bins)))
print (num_bins)
# len(miss_tle_df[miss_tle_df['bin'] < 201])

48


In [53]:
# For each bin - make a request to SpaceTrack for all norads within that bin with a min/max daterange
# This will save the data to a CSV file we will parse next (we save to ensure an interrupted progress
# does not result in a massive amount of lost data)
count = 0
for b in tqdm(range(num_bins)):
#     if b > 0:
    tmp_df = miss_tle_df[miss_tle_df['bin'] == b]
    min_epoch = tmp_df['last_epoch'].min().to_pydatetime()
    max_epoch = tmp_df['last_epoch'].max().to_pydatetime()
    epoch_range = op.inclusive_range(min_epoch - timedelta(minutes=5), max_epoch + timedelta(minutes=5))
    all_norads = list(tmp_df['norad'].unique())
    all_data = st.gp_history(norad_cat_id = all_norads, epoch=epoch_range)

    # Write the data to a file
    for idx, row in tmp_df.iterrows():
        d = list(filter(lambda rec: (rec['NORAD_CAT_ID']==str(row['norad'])) & (abs(pd.to_datetime(rec['EPOCH'], format='%Y-%m-%dT%H:%M:%S.%f') - row['last_epoch']) < pd.Timedelta('5 min')), all_data))[0]
        with open(tmp_tle_file, 'a') as f:
                s = ','.join([str(row['norad']), row['last_epoch'].strftime('%Y%m%d%H%M%S%f'), d['TLE_LINE1'], d['TLE_LINE2'], d['EPOCH']]) 
                f.write(s + '\n')
#         count += 1
#         if count >200:
#             print(f'Finished upto bin {b-1}')
#             break

  exec(code_obj, self.user_global_ns, self.user_ns)
100%|██████████████████████████████████████████████████████████████████████████████████| 48/48 [03:09<00:00,  3.95s/it]


In [54]:

# Open the file created above which contains our new TLE data from SpaceTrack
new_tle_df = pd.read_csv(tmp_tle_file, names = ['norad','last_epoch', 'tle_line1', 'tle_line2', 'tle_epoch'])
new_tle_df['last_epoch'] = pd.to_datetime(new_tle_df['last_epoch'], format='%Y%m%d%H%M%S%f')
new_tle_df

Unnamed: 0,norad,last_epoch,tle_line1,tle_line2,tle_epoch
0,46956,2020-12-07 10:26:42.503000,1 46956U 20085AD 20342.43569727 +.00009211 +0...,2 46956 097.3684 116.6896 0014474 252.4864 107...,2020-12-07T10:27:24.244128
1,46956,2020-12-07 10:26:46.397000,1 46956U 20085AD 20342.43569727 +.00009211 +0...,2 46956 097.3684 116.6896 0014474 252.4864 107...,2020-12-07T10:27:24.244128
2,46956,2020-12-07 10:26:58.711000,1 46956U 20085AD 20342.43569727 +.00009211 +0...,2 46956 097.3684 116.6896 0014474 252.4864 107...,2020-12-07T10:27:24.244128
3,46956,2020-12-07 10:27:03.471000,1 46956U 20085AD 20342.43569727 +.00009211 +0...,2 46956 097.3684 116.6896 0014474 252.4864 107...,2020-12-07T10:27:24.244128
4,46956,2020-12-07 10:27:19.784000,1 46956U 20085AD 20342.43569727 +.00009211 +0...,2 46956 097.3684 116.6896 0014474 252.4864 107...,2020-12-07T10:27:24.244128
...,...,...,...,...,...
8742,46687,2020-12-21 06:00:40.339999,1 46687U 20073T 20356.25001157 -.00032326 0...,2 46687 53.0537 156.4222 0002423 24.7100 179...,2020-12-21T06:00:00.999648
8743,46698,2020-12-21 06:00:40.531000,1 46698U 20073AE 20356.25001157 .00039952 0...,2 46698 53.0541 156.4452 0002064 21.4820 170...,2020-12-21T06:00:00.999648
8744,46694,2020-12-21 06:00:41.914000,1 46694U 20073AA 20356.25001157 -.00011172 0...,2 46694 53.0543 156.4335 0002340 36.3499 163...,2020-12-21T06:00:00.999648
8745,43171,2020-12-21 06:07:21.751000,1 43171U 18011C 20356.25496003 .00000239 0...,2 43171 35.0001 328.2871 0003494 312.5159 47...,2020-12-21T06:07:08.546592


In [55]:
# For each TLE record, find the corresponding socrates data and update their TLE
count = 0
all_success = True
for index, row in tqdm(new_tle_df.iterrows()):
    found = False
    target = tle_df[(tle_df['sat1_norad'] == row['norad']) & (abs(tle_df['sat1_last_epoch']-row['last_epoch']) < pd.Timedelta('5 min'))].index
    if len(target) > 0:
        found = True
        tle_df.at[target,'sat1_tle'] = row['tle_line1'] + ',' + row['tle_line2']
        tle_df.at[target,'sat1_tle_epoch'] = row['tle_epoch']
        count += 1
    target = tle_df[(tle_df['sat2_norad'] == row['norad']) & (abs(tle_df['sat2_last_epoch']-row['last_epoch']) < pd.Timedelta('5 min'))].index
    if len(target) > 0:
        found = True
        tle_df.at[target,'sat2_tle'] = row['tle_line1'] + ',' + row['tle_line2']
        tle_df.at[target,'sat2_tle_epoch'] = row['tle_epoch']
        count += 1
    if not found:
        print(f'Cant find norad={row["norad"]} for date {row["last_epoch"]} - perhaps we have an updated socrates file?')
        all_success = False
        #break
print(f'Finished adding {count} records to tle_df')

3571it [00:48, 88.43it/s]

Cant find norad=26931 for date 2020-12-19 18:51:21.912000 - perhaps we have an updated socrates file?
Cant find norad=26931 for date 2020-12-19 18:51:36.576000 - perhaps we have an updated socrates file?
Cant find norad=26931 for date 2020-12-19 18:52:10.464000 - perhaps we have an updated socrates file?
Cant find norad=42955 for date 2020-12-19 19:32:15.099000 - perhaps we have an updated socrates file?
Cant find norad=42955 for date 2020-12-19 19:32:35.233000 - perhaps we have an updated socrates file?


3636it [00:48, 82.13it/s]

Cant find norad=41923 for date 2020-12-19 20:38:03.564000 - perhaps we have an updated socrates file?
Cant find norad=10514 for date 2020-12-19 20:39:29.268000 - perhaps we have an updated socrates file?
Cant find norad=10514 for date 2020-12-19 20:40:40.071000 - perhaps we have an updated socrates file?


3672it [00:49, 80.81it/s]

Cant find norad=19851 for date 2020-12-19 21:21:35.146000 - perhaps we have an updated socrates file?


3739it [00:50, 76.15it/s]

Cant find norad=12504 for date 2020-12-19 22:05:43.216000 - perhaps we have an updated socrates file?


3773it [00:50, 79.84it/s]

Cant find norad=44885 for date 2020-12-19 23:41:34.771000 - perhaps we have an updated socrates file?


3804it [00:51, 65.03it/s]

Cant find norad=13259 for date 2020-12-20 02:33:28.987000 - perhaps we have an updated socrates file?


3860it [00:52, 74.61it/s]

Cant find norad=43690 for date 2020-12-20 03:53:15.863000 - perhaps we have an updated socrates file?


3926it [00:52, 68.46it/s]

Cant find norad=38709 for date 2020-12-20 09:14:31.789000 - perhaps we have an updated socrates file?
Cant find norad=40028 for date 2020-12-20 09:43:59.065000 - perhaps we have an updated socrates file?


3950it [00:53, 65.01it/s]

Cant find norad=29522 for date 2020-12-20 10:36:45.680000 - perhaps we have an updated socrates file?


3973it [00:53, 66.96it/s]

Cant find norad=39087 for date 2020-12-20 13:44:35.727000 - perhaps we have an updated socrates file?
Cant find norad=39087 for date 2020-12-20 13:44:55.764000 - perhaps we have an updated socrates file?


8747it [01:41, 86.26it/s] 

Finished adding 13183 records to tle_df





In [59]:
# Save the TLE data to a pickle file
tle_df[['sat_pair','tca_time','sat1_norad','sat2_norad','sat1_tle','sat1_tle_epoch','sat2_tle','sat2_tle_epoch']].to_pickle(tle_file_path, 'gzip')
if all_success:
    remove(tmp_tle_file)
else:
    print(f'Please check messages and remove {tmp_tle_file} if everything was okay.')

Please check messages and remove ./tle2.csv if everything was okay.


In [57]:

soc_df, tle2_df = get_all_socrates_and_tle_data(socrates_files_path, tle_file_path)

mtle_df12 = tle2_df[tle2_df['sat1_tle'].isnull()][['sat1_norad','sat1_last_epoch']].rename(columns={'sat1_norad':'norad','sat1_last_epoch':'last_epoch'})
mtle_df22 = tle2_df[tle2_df['sat2_tle'].isnull()][['sat2_norad','sat2_last_epoch']].rename(columns={'sat2_norad':'norad','sat2_last_epoch':'last_epoch'})
miss_tle_df2 = pd.concat([mtle_df12, mtle_df22])