In [1]:
import pandas as pd
import datetime

In [2]:
df1 = pd.read_csv("./satellite_breakup(from PDF).csv")
df1[["LAUNCH DATE", "BREAKUP DATE"]] = df1[["LAUNCH DATE", "BREAKUP DATE"]].apply(pd.to_datetime)
df1["launch_designator"] = df1["INTERNATIONAL DESIGNATOR"].str.slice(stop=8)
df1 = df1[['SATELLITE NAME', 'INTERNATIONAL DESIGNATOR', 'US SATELLITE NUMBER', 'LAUNCH DATE', 'BREAKUP DATE', 'DEBRIS CATALOGED', 'DEBRIS ON-ORBIT', 'launch_designator']]

df2 = pd.read_csv("../../data/satcat_debut0119.csv.gz", compression="gzip")
df2[["DEBUT", "DECAY", "LAUNCH"]] = df2[["DEBUT", "DECAY", "LAUNCH"]].apply(pd.to_datetime)
df2["launch_designator"] = df2.INTLDES.str.slice(stop=8)
df2 = df2[['INTLDES', 'NORAD_CAT_ID', 'OBJECT_TYPE', 'SATNAME', 'DEBUT', 'LAUNCH', 'DECAY', 'launch_designator']]

In [3]:
def fix_date(x):
    if x.year > 2056:
        year = x.year - 100
    else:
        year = x.year
    return datetime.date(year,x.month,x.day)

# dates < 1969 are parsed as 20xx, fix it
df1['BREAKUP DATE'] = df1['BREAKUP DATE'].apply(fix_date)
df1['LAUNCH DATE'] = df1['LAUNCH DATE'].apply(fix_date)

US SATELLITE NUMBER == NORAD_CAT_ID

Check if PDF data includes anything that doesn't exist in SATCAT

NOPE! That's great

In [4]:
df1[~df1["US SATELLITE NUMBER"].isin(df2.NORAD_CAT_ID.values)]
# 0 rows = everything matches up

Unnamed: 0,SATELLITE NAME,INTERNATIONAL DESIGNATOR,US SATELLITE NUMBER,LAUNCH DATE,BREAKUP DATE,DEBRIS CATALOGED,DEBRIS ON-ORBIT,launch_designator


As seen below, there are 13 incidents where the same launch had 2 documented breakup dates. Unfortunately we cannot assign the proper breakup date for debris generated from these events. However, there aren't too many of these debris, we may be able to get away with assigning 1 of the breakup dates to all debris with the same starting launch designator.

In [5]:
t = df1.launch_designator.value_counts()
t[t>=2]

1990-110    2
1990-045    2
1987-079    2
1999-057    2
1986-069    2
1988-085    2
1991-025    2
1984-011    2
2002-037    2
2008-067    2
1992-047    2
1989-001    2
2010-007    2
Name: launch_designator, dtype: int64

In [6]:
df1 = df1.drop_duplicates(subset="launch_designator")

In [7]:
combined = df2.merge(df1[["launch_designator","BREAKUP DATE"]], how="left", left_on="launch_designator", right_on="launch_designator")
# combined[(combined.OBJECT_TYPE=="DEBRIS") & combined["BREAKUP DATE"].notnull()]
# combined[(combined.OBJECT_TYPE=="DEBRIS") & combined["BREAKUP DATE"].isnull()]
len(combined)

47315

All relevant data is now combined int he `combined` dataframe.  We just need to select the correct date to use with the following rules:
1. For non-DEBRIS, we can use the LAUNCH
1. For debris listed in the PDF file, we should use `BREAKUP DATE`
1. If the `DEBUT` is earlier than `DECAY`, we can probably trust it
1. If it `DEBUT` later than 2005, we can also trust it since these are reported to space track as soon as they are spotted
1. if a debris `DECAYED` within 1 year of `LAUNCH`, let's just assumed that it was created on launch, ok? :)

In [8]:
# For non-DEBRIS, we can use the LAUNCH
combined.loc[(combined.OBJECT_TYPE!="DEBRIS"),"exist_date"] = combined.LAUNCH
len(combined[combined["exist_date"].isnull()])

30922

In [9]:
# For debris listed in the PDF file, we should use BREAKUP DATE
combined.loc[((combined.OBJECT_TYPE=="DEBRIS") & combined["BREAKUP DATE"].notnull()),"exist_date"] = combined["BREAKUP DATE"]
len(combined[combined["exist_date"].isnull()])

10294

In [10]:
# If the `DEBUT` is earlier than `DECAY`, we can probably trust it
combined.loc[((combined.OBJECT_TYPE=="DEBRIS") & combined["exist_date"].isnull() & (combined.DECAY > combined.DEBUT)),"exist_date"] = combined.DEBUT
len(combined[combined["exist_date"].isnull()])

9118

In [11]:
# If it `DEBUT` later than 2005, we can also trust it since these are reported to space track as soon as they are spotted
combined.loc[((combined.OBJECT_TYPE=="DEBRIS") & combined["exist_date"].isnull() & (combined.DEBUT > "2005")),"exist_date"] = combined.DEBUT
len(combined[combined["exist_date"].isnull()])

7236

In [12]:
# if a debris DECAYED within 1 year of launch, let's just assumed that it was created on launch, ok? :)
combined.loc[(combined["exist_date"].isnull() & (combined.DECAY - combined.LAUNCH < datetime.timedelta(days=365))),"exist_date"] = combined.LAUNCH
len(combined[combined["exist_date"].isnull()])

2831

In [13]:
# set the rest to launch date since we don't have anything else to go by
combined.loc[(combined["exist_date"].isnull()),"exist_date"] = combined.LAUNCH
len(combined[combined["exist_date"].isnull()])

0

In [14]:
combined[combined["exist_date"].isnull()].SATNAME.value_counts()

Series([], Name: SATNAME, dtype: int64)

In [15]:
combined[combined["exist_date"].isnull()]

Unnamed: 0,INTLDES,NORAD_CAT_ID,OBJECT_TYPE,SATNAME,DEBUT,LAUNCH,DECAY,launch_designator,BREAKUP DATE,exist_date


In [16]:
combined[combined.SATNAME=="SL-8 DEB"].sort_values(by="DECAY")

Unnamed: 0,INTLDES,NORAD_CAT_ID,OBJECT_TYPE,SATNAME,DEBUT,LAUNCH,DECAY,launch_designator,BREAKUP DATE,exist_date
1198,1964-046G,860,DEBRIS,SL-8 DEB,2002-01-13 00:00:00,1964-08-18,1964-09-16,1964-046,,1964-08-18 00:00:00
1196,1964-046E,857,DEBRIS,SL-8 DEB,2002-01-13 00:00:00,1964-08-18,1964-09-21,1964-046,,1964-08-18 00:00:00
1197,1964-046F,859,DEBRIS,SL-8 DEB,2002-01-13 00:00:00,1964-08-18,1964-09-25,1964-046,,1964-08-18 00:00:00
1694,1965-020AU,1418,DEBRIS,SL-8 DEB,2002-01-13 00:00:00,1965-03-15,1965-08-10,1965-020,1965-03-15,1965-03-15
1677,1965-020AB,1356,DEBRIS,SL-8 DEB,2002-01-13 00:00:00,1965-03-15,1965-08-14,1965-020,1965-03-15,1965-03-15
...,...,...,...,...,...,...,...,...,...,...
25585,1992-053D,25027,DEBRIS,SL-8 DEB,2004-10-07 11:32:23,1992-08-12,NaT,1992-053,,1992-08-12 00:00:00
27540,1993-036C,23007,DEBRIS,SL-8 DEB,2004-10-07 11:32:23,1993-06-16,NaT,1993-036,2009-02-10,2009-02-10
27565,1993-036D,25028,DEBRIS,SL-8 DEB,2004-10-07 11:32:23,1993-06-16,NaT,1993-036,2009-02-10,2009-02-10
37850,2002-054D,27768,DEBRIS,SL-8 DEB,2004-10-07 12:05:46,2002-11-28,NaT,2002-054,,2002-11-28 00:00:00


In [17]:
combined[combined.SATNAME=="SALYUT 7 DEB"].sort_values(by="DECAY")

Unnamed: 0,INTLDES,NORAD_CAT_ID,OBJECT_TYPE,SATNAME,DEBUT,LAUNCH,DECAY,launch_designator,BREAKUP DATE,exist_date
16425,1982-033D,13204,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1982-06-20,1982-033,,1982-04-19 00:00:00
16450,1982-033E,13270,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1982-06-24,1982-033,,1982-04-19 00:00:00
16475,1982-033F,13274,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1982-06-27,1982-033,,1982-04-19 00:00:00
16500,1982-033G,13275,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1982-06-27,1982-033,,1982-04-19 00:00:00
16525,1982-033H,13287,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1982-07-02,1982-033,,1982-04-19 00:00:00
...,...,...,...,...,...,...,...,...,...,...
16513,1982-033GN,16253,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1986-12-03,1982-033,,1982-04-19 00:00:00
16492,1982-033FS,16166,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1987-01-15,1982-033,,1982-04-19 00:00:00
16527,1982-033HB,16813,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1987-01-27,1982-033,,1982-04-19 00:00:00
16528,1982-033HC,16814,DEBRIS,SALYUT 7 DEB,2004-10-07 11:32:23,1982-04-19,1987-02-20,1982-033,,1982-04-19 00:00:00


In [18]:
display(df1[df1["INTERNATIONAL DESIGNATOR"]=="1961-015C"])
print("Documented in satcat:",len(df2[df2.INTLDES.str.startswith("1961-015C") & (df2.OBJECT_TYPE == "DEBRIS")]))
# can see from this example that the PDF file DEBRIS CATALOGED and DEBRIS ON-ORBIT is more than SATCAT

Unnamed: 0,SATELLITE NAME,INTERNATIONAL DESIGNATOR,US SATELLITE NUMBER,LAUNCH DATE,BREAKUP DATE,DEBRIS CATALOGED,DEBRIS ON-ORBIT,launch_designator
0,TRANSIT 4A R/B,1961-015C,118,1961-06-29,1961-06-29,296,172.0,1961-015


Documented in satcat: 24


In [19]:
combined.to_pickle("../../data/satcat_incl_breakup_dates.pkl.gz","gzip")

In [22]:
df1.sort_values(by="DEBRIS CATALOGED", ascending=False).head(20)

Unnamed: 0,SATELLITE NAME,INTERNATIONAL DESIGNATOR,US SATELLITE NUMBER,LAUNCH DATE,BREAKUP DATE,DEBRIS CATALOGED,DEBRIS ON-ORBIT,launch_designator
195,FENGYUN 1C,1999-025A,25730,1999-05-10,2007-01-11,3442,2832.0,1999-025
171,COSMOS 2251,1993-036A,22675,1993-06-16,2009-02-10,1668,1076.0,1993-036
176,STEP II R/B,1994-029B,23106,1994-05-19,1996-06-03,754,82.0,1994-029
188,IRIDIUM 33,1997-051C,24946,1997-09-14,2009-02-10,628,333.0,1997-051
209,COSMOS 2421,2006-026A,29247,2006-06-25,2008-03-14,509,0.0,2006-026
109,SPOT 1 R/B,1986-019C,16615,1986-02-22,1986-11-13,498,31.0,1986-019
76,COSMOS 1275,1981-053A,12504,1981-06-04,1981-07-24,479,421.0,1981-053
6,OV2-1/LCS 2 R/B,1965-082DM,1822,1965-10-15,1965-10-15,473,32.0,1965-082
200,NOAA 16,2000-055A,26536,2000-09-21,2015-11-25,458,458.0,2000-055
27,NIMBUS 4 R/B,1970-025C,4367,1970-04-08,1970-10-17,441,299.0,1970-025
