In [52]:
# === Mandatory Header ===
import sys
LEVEL = '../'
sys.path.append(LEVEL)
# =========================

# imports 
from util.load import load_satcat_by_date

## SatCat Breakdown

The Satellite Catalog (SatCat) provides a comprehensive history of every unclassified object that has ever existed in Earth's orbit. This includes objects that have re-entered the atmosphere (splashdowns or burn-ups) or have been retired.

Each entry in the SatCat contains 24 columns of information per object:

<table>
<tbody>
    <tr><th>Field</th><th>Description</th><th>Notes</th></tr>
    <tr><td>INTLDES</td><td>Can be used to uniquely identify an object using launch year, number, and piece</td><td>Also known as COSPAR designation or NSSDC ID</td></tr>
    <tr><td>NORAD_CAT_ID</td><td>Sequential number assigned by the US Space Force as objects are cataloged</td><td>This is the most common way of identifying a satellite</td></tr>
    <tr><td>OBJECT_TYPE</td><td>Identifies the kind of object it is</td><td>Choices: 'ROCKET BODY', 'PAYLOAD', 'DEBRIS', 'UNKNOWN'</td></tr>
    <tr><td>SATNAME</td><td>Name associated with the satellite</td><td></td></tr>
    <tr><td>COUNTRY</td><td>The nation or group responsible for the object</td><td></td></tr>
    <tr><td>LAUNCH</td><td>Date of launch</td><td>Format: YYYY-MM-DD</td></tr>
    <tr><td>SITE</td><td>Location/site of launch</td><td></td></tr>
    <tr><td>DECAY</td><td>Date that the object decayed; NaN if the object has not yet decayed</td><td></td></tr>
    <tr><td>PERIOD</td><td>The number of minutes an object takes to complete one full orbit</td><td>Generally wise to use the most recent state for this information</td></tr>
    <tr><td>INCLINATION</td><td>The angle between the equator and the orbital plane</td><td>Generally wise to use the most recent state for this information</td></tr>
    <tr><td>APOGEE</td><td>Point in the orbit where an Earth satellite is farthest from the Earth (kilometers)</td><td>Generally wise to use the most recent state for this information</td></tr>
    <tr><td>PERIGEE</td><td>Point in the orbit where an Earth satellite is closest to the Earth (kilometers)</td><td>Generally wise to use the most recent state for this information</td></tr>
    <tr><td>COMMENT</td><td>Very rare; sometimes objects have a string with notes</td><td></td></tr>
    <tr><td>COMMENTCODE</td><td>Usually NaN; otherwise a number associated with COMMENT</td><td></td></tr>
    <tr><td>RCSVALUE</td><td>(HIDDEN) Always zero</td><td>Higher data access needed to get exact values</td></tr>
    <tr><td>RCS_SIZE</td><td>Vague description of object size</td><td>Values: 'LARGE', 'MEDIUM', 'SMALL', or NaN</td></tr>
    <tr><td>FILE</td><td>Unique identifying number of the source file for an object's data; higher numbers are more recent uploads</td><td></td></tr>
    <tr><td>LAUNCH_YEAR</td><td>Year of launch (YYYY)</td><td></td></tr>
    <tr><td>LAUNCH_NUM</td><td>Launch number</td><td></td></tr>
    <tr><td>LAUNCH_PIECE</td><td>Three-letter code representing the sequential identifier of a piece in a launch</td><td></td></tr>
    <tr><td>CURRENT</td><td>Documentation is unclear, but the value is always 'Y' for general access</td><td></td></tr>
    <tr><td>OBJECT_NAME</td><td>Name of the object</td><td>identical to SATNAME</td></tr>
    <tr><td>OBJECT_ID</td><td>Unique identifier for the object</td><td>identical to INTLDES</td></tr>
    <tr><td>OBJECT_NUMBER</td><td>Unique identifier for the object</td><td>identical to NORAD_CAT_ID</td></tr>
</tbody>
</table>


---
### Load Data 
---

In [53]:
df_satcat = load_satcat_by_date('24May2025')

---
### High Level Look
---

In [54]:
df_satcat.head(3)

Unnamed: 0,INTLDES,NORAD_CAT_ID,OBJECT_TYPE,SATNAME,COUNTRY,LAUNCH,SITE,DECAY,PERIOD,INCLINATION,...,RCSVALUE,RCS_SIZE,FILE,LAUNCH_YEAR,LAUNCH_NUM,LAUNCH_PIECE,CURRENT,OBJECT_NAME,OBJECT_ID,OBJECT_NUMBER
0,1957-001A,1,ROCKET BODY,SL-1 R/B,CIS,1957-10-04,TTMTR,1957-12-01,96.19,65.1,...,0,LARGE,1,1957,1,A,Y,SL-1 R/B,1957-001A,1
1,1958-001A,4,PAYLOAD,EXPLORER 1,US,1958-02-01,AFETR,1970-03-31,88.48,33.15,...,0,,1,1958,1,A,Y,EXPLORER 1,1958-001A,4
2,1958-003A,6,PAYLOAD,EXPLORER 3,US,1958-03-26,AFETR,1958-06-28,103.6,33.5,...,0,,1,1958,3,A,Y,EXPLORER 3,1958-003A,6


In [55]:
print('total objects in catalog: ',df_satcat.shape[0])

total objects in catalog:  64011


In [56]:
df_satcat.describe() 

Unnamed: 0,NORAD_CAT_ID,PERIOD,INCLINATION,APOGEE,PERIGEE,COMMENTCODE,RCSVALUE,FILE,LAUNCH_YEAR,LAUNCH_NUM,OBJECT_NUMBER
count,64011.0,63037.0,63037.0,63037.0,63037.0,3517.0,64011.0,64011.0,64011.0,64011.0,64011.0
mean,32015.650919,178.180192,68.902649,3550.205752,1743.955106,3.291157,0.0,7142.294449,1996.827217,59.0182,32015.650919
std,18492.26547,690.084959,25.061502,13831.789472,6734.84348,1.409969,0.0,3242.574651,19.517696,42.904259,18492.26547
min,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1957.0,0.0,1.0
25%,16003.5,89.26,53.0,267.0,196.0,3.0,0.0,7337.0,1981.0,25.0,16003.5
50%,32006.0,94.11,70.19,484.0,409.0,3.0,0.0,8690.0,1995.0,51.0,32006.0
75%,48008.5,101.14,92.0,878.0,672.0,5.0,0.0,9134.0,2019.0,84.0,48008.5
max,64095.0,95687.68,152.52,641287.0,299435.0,5.0,0.0,9138.0,2025.0,254.0,64095.0


---
### Categorical variables
---

#### Summary of Findings

1. `INTLDES`/`OBJECT_ID`, `NORAD_CAT_ID`/`OBJECT_NUMBER`, and `SATNAME`/`OBJECT_NAME` are redundant pairs

2. `LAUNCH_PIECE` contains nan values

3. `SATNAME`/`OBJECT_NAME` contain duplicate strings.  

In [57]:
features_cat = ['INTLDES', 'NORAD_CAT_ID', 'OBJECT_TYPE', 'SATNAME', 'COUNTRY',
       'LAUNCH', 'SITE', 'DECAY','COMMENT', 'COMMENTCODE', 'RCSVALUE', 'RCS_SIZE', 'FILE', 'LAUNCH_YEAR',
       'LAUNCH_NUM', 'LAUNCH_PIECE', 'CURRENT', 'OBJECT_NAME', 'OBJECT_ID',
       'OBJECT_NUMBER']

#### i. Object Identifiers

We have several fields that can be usued to identify objects (`INTLDES`, `NORAD_CAT_ID`, `SATNAME`, `LAUNCH_YEAR`, `LAUNCH_NUM, LAUNCH_PIECE`, `OBJECT_NAME`, `OBJECT_ID`). In this subsection we will answer basic questions about these fields as well as learn more about the data. 

**Q: Are `INTLDES` and `OBJECT_ID` always identical/redundant? Is either ever null? Is each row unique?** 

**A: Yes, they appear to be identical; No, neither are ever null; No duplicates** 

In [58]:
print('number of rows that differ:', len(df_satcat[~(df_satcat['INTLDES']==df_satcat['OBJECT_ID'])]))
print('number null:', len(df_satcat[df_satcat['INTLDES'].isna()]))
print('number of duplicates', (df_satcat['INTLDES'].duplicated()).sum())

number of rows that differ: 0
number null: 0
number of duplicates 0


**Q: Are `NORAD_CAT_ID` and `OBJECT_NUMBER` always identical/redundant? Is either ever null? Is each row unique?** 

**A: Yes, they appear to be identical; No, neither are ever null; No Duplicates.** 

In [59]:
print('number of rows that differ:', len(df_satcat[~(df_satcat['NORAD_CAT_ID']==df_satcat['OBJECT_NUMBER'])]))
print('number null:', len(df_satcat[df_satcat['NORAD_CAT_ID'].isna()]))
print('number of duplicates', (df_satcat['NORAD_CAT_ID'].duplicated()).sum())

number of rows that differ: 0
number null: 0
number of duplicates 0


**Q: Are `SATNAME` and `OBJECT_NAME` always identical/redundant? Is either ever null? Is each row unique?** 

**A: Yes, they appear to be identical; No, neither are ever null;** 
**Yes, there are duplicates so you can't use this field as an ID; debris and rocketbodies for a payload will not be reliably unique.**

In [60]:
print('number of rows that differ:', len(df_satcat[~(df_satcat['SATNAME']==df_satcat['OBJECT_NAME'])]))
print('number null:', len(df_satcat[df_satcat['SATNAME'].isna()]))
print('number of duplicates', (df_satcat['SATNAME'].duplicated()).sum())

number of rows that differ: 0
number null: 0
number of duplicates 41064


In [61]:
# sneak peak at duplicate names
df_satcat['SATNAME'].value_counts()[lambda x: x > 1]

SATNAME
FENGYUN 1C DEB        3475
COSMOS 1408 DEB       1806
DELTA 1 DEB           1800
COSMOS 2251 DEB       1714
CZ-6A DEB             1461
                      ... 
THOR BURNER 2A R/B       2
COSMOS 443 DEB           2
OPS 3559                 2
DISCOVERER 32 DEB        2
COSMOS 490 DEB           2
Name: count, Length: 1396, dtype: int64

In [62]:
# sneak peak at duplicate names (no DEB/R/B)
df_satcat['SATNAME'].value_counts()[lambda x: (x > 1) & ~x.index.str.contains('DEB|R/B', regex=True)]

SATNAME
SL-6 PLAT           289
SL-12 PLAT          232
WESTFORD NEEDLES    145
OBJECT B             86
OBJECT C             81
                   ... 
OPS 3722              2
PACE                  2
FTV 1154              2
PAYLOAD B             2
OPS 3559              2
Name: count, Length: 94, dtype: int64

**Q: Can use use `LAUNCH_YEAR`, `LAUNCH_NUM`, and `LAUNCH_PIECE` to reconstruct `INTLDES`/`OBJECT_ID`**

**A: Yes but you have (1) make sure your launch number is padded with zeros (e.g.: 001, 011, 111) and (2) replace NaN launch pieces with 'NA'**

In [63]:
df_satcat['LAUNCH_NUM_padded'] = df_satcat['LAUNCH_NUM'].astype(str).str.pad(width=3, side='left', fillchar='0')
df_satcat['LAUNCH_PIECE_no_nan'] = df_satcat['LAUNCH_PIECE'].fillna('NA')
df_satcat['INTLDES_reconstructed'] = (df_satcat['LAUNCH_YEAR'].astype(str) + '-' +df_satcat['LAUNCH_NUM_padded'].astype(str) +df_satcat['LAUNCH_PIECE_no_nan'].astype(str))

print('number of rows that differ:', len(df_satcat[~(df_satcat['INTLDES']==df_satcat['INTLDES_reconstructed'])]))

number of rows that differ: 0


**Q: Are there any trends in the objects with an undefined `LAUNCH_PIECE` field?**

**A: **

In [64]:
null_lp = df_satcat[df_satcat['LAUNCH_PIECE'].isna()]

print('objects')

objects


In [65]:
null_lp

Unnamed: 0,INTLDES,NORAD_CAT_ID,OBJECT_TYPE,SATNAME,COUNTRY,LAUNCH,SITE,DECAY,PERIOD,INCLINATION,...,LAUNCH_YEAR,LAUNCH_NUM,LAUNCH_PIECE,CURRENT,OBJECT_NAME,OBJECT_ID,OBJECT_NUMBER,LAUNCH_NUM_padded,LAUNCH_PIECE_no_nan,INTLDES_reconstructed
11111,1965-082NA,3400,DEBRIS,TITAN 3C TRANSTAGE DEB,US,1965-10-15,AFETR,1979-03-10,94.12,32.01,...,1965,82,,Y,TITAN 3C TRANSTAGE DEB,1965-082NA,3400,82,,1965-082NA
12930,1981-053NA,39940,DEBRIS,COSMOS 1275 DEB,CIS,1981-06-04,PKMTR,,105.31,82.99,...,1981,53,,Y,COSMOS 1275 DEB,1981-053NA,39940,53,,1981-053NA
15737,2006-026NA,33142,DEBRIS,COSMOS 2421 DEB,CIS,2006-06-25,TTMTR,2008-08-06,88.33,65.08,...,2006,26,,Y,COSMOS 2421 DEB,2006-026NA,33142,26,,2006-026NA
16369,1986-019NA,17959,DEBRIS,ARIANE 1 DEB,FR,1986-02-22,FRGUI,1989-08-24,90.41,98.52,...,1986,19,,Y,ARIANE 1 DEB,1986-019NA,17959,19,,1986-019NA
19058,1986-017NA,25686,DEBRIS,MIR DEB,CIS,1986-02-19,TTMTR,1999-07-10,87.99,51.64,...,1986,17,,Y,MIR DEB,1986-017NA,25686,17,,1986-017NA
19770,1994-029NA,24322,DEBRIS,PEGASUS DEB,US,1994-05-19,AFWTR,1997-05-27,91.38,82.08,...,1994,29,,Y,PEGASUS DEB,1994-029NA,24322,29,,1994-029NA
22379,2011-037NA,46634,DEBRIS,FREGAT DEB,CIS,2011-07-18,TTMTR,2021-06-22,104.65,51.33,...,2011,37,,Y,FREGAT DEB,2011-037NA,46634,37,,2011-037NA
22536,1998-067NA,42911,PAYLOAD,TANYUSHA 1,CIS,1998-11-20,TTMTR,2019-07-30,91.68,51.64,...,1998,67,,Y,TANYUSHA 1,1998-067NA,42911,67,,1998-067NA
22679,1970-025NA,6161,DEBRIS,THORAD AGENA D DEB,US,1970-04-08,AFWTR,1988-10-30,92.25,101.9,...,1970,25,,Y,THORAD AGENA D DEB,1970-025NA,6161,25,,1970-025NA
25420,1982-092NA,49891,DEBRIS,COSMOS 1408 DEB,CIS,1982-09-16,PKMTR,2022-02-13,90.01,82.55,...,1982,92,,Y,COSMOS 1408 DEB,1982-092NA,49891,92,,1982-092NA


---
### Numerical Variables
---