In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import re

import spacetrack.operators as op
from spacetrack import SpaceTrackClient
from datetime import datetime, timedelta

from tqdm import tqdm

spacetrack_usr, spacetrack_pwd = open('./spacetrack_pwd.key').read()[:-1].split(',')
st = SpaceTrackClient(identity=spacetrack_usr, password=spacetrack_pwd)

In [2]:
def get_all_socrates_data(path):
    '''
    Builds a dataframe out of all the socrates data files
    
    Parameters:
    -----------
    path : str
        Relative file path of socrates files
    
    Returns
    -------
    df : Pandas Dataframe
        Combined set of all socrates data
    '''
    files = [ (match[0],match[1]) for f in listdir(path) if isfile(join(path, f))  if (match:=re.search('^socrates_([0-9]{14})\.csv(\.gz)?$', f))]
    files

    # Build single dataset
    df = pd.DataFrame()
    for file,date in files:
        tmp_df = pd.read_csv(path + file)
        df = pd.concat([df,tmp_df])


    # Fix dates and timedeltas
    df['extract_date'] = pd.to_datetime(df['extract_date'], format='%Y-%m-%d %H:%M:%S.%f')
    df['start_time'] = pd.to_datetime(df['start_time'], format='%Y %b %d %H:%M:%S.%f')
    df['tca_time'] = pd.to_datetime(df['tca_time'], format='%Y %b %d %H:%M:%S.%f')
    df['stop_time'] = pd.to_datetime(df['stop_time'], format='%Y %b %d %H:%M:%S.%f')
    df['sat1_days_epoch'] = pd.to_timedelta(df['sat1_days_epoch'], 'd')
    df['sat2_days_epoch'] = pd.to_timedelta(df['sat2_days_epoch'], 'd')
    df['sat1_last_epoch'] = df['tca_time'] - df['sat1_days_epoch']
    df['sat2_last_epoch'] = df['tca_time'] - df['sat2_days_epoch']

    # Add "pair" column
    df['sat_pair'] = df.apply(lambda x: x['sat1_name'] + '-' + x['sat2_name'], axis=1)
    
    return df

In [3]:
group_num = 0
def set_group_number(x):
    '''
    Returns group number for each row (via pd.apply)
    
    Parameters:
    -----------
    x : Boolean
    
    Returns
    -------
    group_num : int
    '''
    global group_num
    if x:
        group_num += 1
    return group_num

def get_socrates_cleaned_data(path):
    '''
    Builds a dataframe out of all the socrates data files
    and remove duplicates and sorts
    
    Parameters:
    -----------
    path : str
        Relative file path of socrates files
    
    Returns
    -------
    df : Pandas Dataframe
        Combined set of all socrates data
    '''
    df = get_all_socrates_data(path)

    # Clean the data
    # Remove duplicates - keep the first occurence of a sat-pair and tca_time
    df = df.drop_duplicates(subset=['sat_pair', 'tca_time'], keep='first')

    # Set a group number (some entries have TCA times that change slightly and these will be grouped together)
    df = df.sort_values(['sat_pair','tca_time'])
    df['group'] = ((df['sat_pair'] != df['sat_pair'].shift(1)) | (df['tca_time']-df['tca_time'].shift(1) > pd.Timedelta('1 min'))).apply(set_group_number)

    # Resort
    df = df.sort_values(['group','extract_date'])
    
    return df

In [4]:
def get_socrates_with_tle_data(df, tle_data_path):
    '''
    Merges the socrates data with the TLE data to create a new dataframe
    
    Parameters:
    -----------
    df : Pandas Dataframe
        The socrates dataframe
        
    tle_data_path : str
        Relative file path of TLE data
    
    Returns
    -------
    df : Pandas Dataframe
        Combined set of socrates data with TLE data (from file)
    '''
    # Get last row (most recent socrates record) of each group
    g = df.groupby('group')
    gdf = g.tail(1)

    # Open the TLE Pickle file and merge
    tle_df = pd.read_pickle(tle_data_path)
    gdf = gdf.merge(tle_df, left_on=['sat_pair','tca_time', 'sat1_norad', 'sat2_norad'], right_on=['sat_pair','tca_time', 'sat1_norad', 'sat2_norad'], how='left')
    
    return gdf

In [5]:

socrates_files_path = '../../data/socrates/'
tle_file_path = '../../data/socrates_tca_gp_history_tle.pkl.gz'

sdf = get_socrates_cleaned_data(socrates_files_path)
gdf = get_socrates_with_tle_data(sdf, tle_file_path)

In [6]:
tle_df = pd.read_csv('./tle.csv', names = ['sat_pair','tca_time', 'sat1_tle1', 'sat1_tle2', 'sat1_tle_epoch', 'sat2_tle1', 'sat2_tle2', 'sat2_tle_epoch'])
tle_df['tca_time'] = pd.to_datetime(tle_df['tca_time'], format='%Y%m%d%H%M%S')
# tle_df['sat1_tle_epoch'] = pd.to_datetime(tle_df['sat1_tle_epoch'], format='%Y-%m-%dT%H:%M:%S.%f')
# tle_df['sat2_tle_epoch'] = pd.to_datetime(tle_df['sat2_tle_epoch'], format='%Y-%m-%dT%H:%M:%S.%f')
tle_df

Unnamed: 0,sat_pair,tca_time,sat1_tle1,sat1_tle2,sat1_tle_epoch,sat2_tle1,sat2_tle2,sat2_tle_epoch
0,3CAT-5B [+]-COSMOS 252 DEB [-],2020-12-22 06:27:59,1 46293U 20061X 20352.95181124 .00000738 0...,2 46293 97.5085 64.0856 0002349 333.1329 26...,2020-12-17T22:50:36.491136,1 3555U 68097H 20353.34640880 .00000087 0...,2 3555 62.3143 315.2097 0929972 107.7902 262...,2020-12-18T08:18:49.720320
1,3CAT-5B [+]-FENGYUN 1C DEB [-],2020-12-16 04:03:10,1 46293U 20061X 20350.69929168 .00000643 0...,2 46293 97.5090 61.8769 0002540 343.7527 16...,2020-12-15T16:46:58.801152,1 47022U 99025FAY 20349.93191510 .00008194 0...,2 47022 97.7044 231.8299 0132310 191.1137 180...,2020-12-14T22:21:57.464640
2,3CAT-5B [+]-FENGYUN 1C DEB [-],2020-12-18 02:05:27,1 46293U 20061X 20352.95181124 .00000738 0...,2 46293 97.5085 64.0856 0002349 333.1329 26...,2020-12-17T22:50:36.491136,1 32435U 99025DAX 20351.92615303 .00003034 0...,2 32435 99.4779 238.1419 0103944 95.4137 265...,2020-12-16T22:13:39.621792
3,3CAT-5B [+]-SL-12 DEB [-],2020-12-14 14:17:32,1 46293U 20061X 20348.77802097 .00000559 0...,2 46293 97.5092 59.9931 0002751 350.6899 9...,2020-12-13T18:40:21.011808,1 29441U 89039CQ 20348.88858275 .00056132 0...,2 29441 62.6947 200.8757 3933409 83.5368 318...,2020-12-13T21:19:33.549600
4,3CAT-5B [+]-TAURUS R/B [-],2020-12-12 03:22:51,1 46293U 20061X 20346.79049707 .00000629 0...,2 46293 97.5092 58.0441 0002943 357.3223 2...,2020-12-11T18:58:18.946848,1 26103U 00014B 20346.79598362 .00000635 0...,2 26103 97.2993 292.1752 0028832 174.1268 186...,2020-12-11T19:06:12.984768
...,...,...,...,...,...,...,...,...
1198,CELESTIS-02 & TAURUS R/B [+]-COSMOS 1867 COOLA...,2020-12-16 13:47:05,1 25160U 98007D 20350.67111718 -.00000098 0...,2 25160 107.9677 242.2346 0063877 328.0758 92...,2020-12-15T16:06:24.524352,1 39643U 87060L 20350.45669267 .00000005 0...,2 39643 65.0111 245.7431 0017127 281.0407 78...,2020-12-15T10:57:38.246688
1199,CELESTIS-02 & TAURUS R/B [+]-COSMOS 2251 DEB [-],2020-12-14 15:48:30,1 25160U 98007D 20349.20479733 -.00000095 0...,2 25160 107.9676 239.2850 0063808 330.5411 148...,2020-12-14T04:54:54.489312,1 39588U 93036BUT 20340.95960942 .00002097 0...,2 39588 74.1339 86.2948 0074005 141.5888 335...,2020-12-05T23:01:50.253888
1200,CELESTIS-02 & TAURUS R/B [+]-COSMOS 2251 DEB [-],2020-12-19 00:36:05,1 25160U 98007D 20353.69926235 -.00000071 0...,2 25160 107.9677 248.3258 0064014 322.9846 109...,2020-12-18T16:46:56.267040,1 34678U 93036XY 20353.65183208 -.00000004 0...,2 34678 74.0384 73.4147 0026061 206.3140 273...,2020-12-18T15:38:38.291712
1201,CELESTIS-02 & TAURUS R/B [+]-DMSP 5D-2 F13 DEB...,2020-12-10 01:51:06,1 25160U 98007D 20344.48059663 -.00000064 +0...,2 25160 107.9675 229.7820 0063573 338.4740 092...,2020-12-09T11:32:03.548832,1 40605U 95015FC 20344.03512373 .00001349 0...,2 40605 98.7353 6.0801 0034664 84.5592 275...,2020-12-09T00:50:34.690272


In [7]:
print(len(gdf[gdf['sat1_tle'].notnull()]))
print(len(tle_df))

26
1203


In [8]:
for index, row in tle_df.iterrows():
    target = gdf[(gdf['sat_pair'] == row['sat_pair']) & (abs(gdf['tca_time']-row['tca_time']) < pd.Timedelta('1 sec'))].index
    gdf.at[target,'tca_time2'] = row['tca_time']
    gdf.at[target,'sat1_tle'] = row['sat1_tle1'] + ',' + row['sat1_tle2']
    gdf.at[target,'sat2_tle'] = row['sat2_tle1'] + ',' + row['sat2_tle2']
    gdf.at[target,'sat1_tle_epoch'] = row['sat1_tle_epoch']
    gdf.at[target,'sat2_tle_epoch'] = row['sat2_tle_epoch']

In [9]:
print(len(gdf[gdf['sat1_tle'].notnull()]))

1229


In [10]:
gdf[gdf['tca_time2'].notnull()][['tca_time','tca_time2']]

Unnamed: 0,tca_time,tca_time2
20,2020-12-22 06:27:59.948,2020-12-22 06:27:59
21,2020-12-16 04:03:10.404,2020-12-16 04:03:10
22,2020-12-18 02:05:27.601,2020-12-18 02:05:27
23,2020-12-14 14:17:32.708,2020-12-14 14:17:32
24,2020-12-12 03:22:51.707,2020-12-12 03:22:51
...,...,...
1218,2020-12-16 13:47:05.896,2020-12-16 13:47:05
1219,2020-12-14 15:48:30.361,2020-12-14 15:48:30
1220,2020-12-19 00:36:05.966,2020-12-19 00:36:05
1221,2020-12-10 01:51:06.548,2020-12-10 01:51:06


In [11]:
gdf[['sat_pair','tca_time','sat1_norad','sat2_norad','sat1_tle','sat1_tle_epoch','sat2_tle','sat2_tle_epoch']].to_pickle(tle_file_path, 'gzip')