# Get SPACE TRACK data

In [75]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
from datetime import timedelta
import numpy as np
import glob
import json

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

## Jonathan McDowell's catalogue

In [669]:
src = pd.read_csv(
    "https://planet4589.org/space/gcat/tsv/derived/currentcat.tsv",
    sep="\t",
    low_memory=False,
    skiprows=0,
)

In [670]:
src.columns = src.columns.str.lower()

In [671]:
src_df = src.drop([0])

In [672]:
debris_categories = ["D", "C", "R", "P"]

In [689]:
df = src_df[
    (src_df["expandedstatus"].str.contains("In Earth orbit|Attached to"))
    & (src_df["active"].isin(debris_categories))
].copy()

In [690]:
df.active.value_counts()

D    12731
P     3446
C     3128
R     1976
Name: active, dtype: int64

#### Get the debris types 

In [694]:
types = {
    "P": "Payload",
    "C": "Litter",
    "R": "Rocket stage",
    "D": "Debris",
}

In [695]:
df["active_desc"] = df["active"].map(types)

In [696]:
df.head()

Unnamed: 0,#jcat,deepcat,satcat,piece,active,type,name,ldate,parent,sdate,expandedstatus,ddate,odate,perigee,pf,apogee,af,inc,if,active_desc
5,S00005,-,5,1958 BET 2,P,P,Vanguard I,1958 Mar 17,S00016,1958 Mar 17 1224,In Earth orbit,-,2022 Feb 13,652,,3830,,34.25,,Payload
11,S00011,-,11,1959 ALP 1,P,P,Vanguard II,1959 Feb 17,S00012,1959 Feb 17 1605,In Earth orbit,-,2022 Feb 14,553,,2935,,32.87,,Payload
12,S00012,-,12,1959 ALP 2,R,R3,GRC 33-KS-2800,1959 Feb 17,R02749,1959 Feb 17 1604,In Earth orbit,-,2022 Feb 14,553,,3324,,32.90,,Rocket stage
16,S00016,-,16,1958 BET 1,R,R3,GRC 33-KS-2800,1958 Mar 17,R02064,1958 Mar 17 1223,In Earth orbit,-,2022 Feb 14,650,,4225,,34.27,,Rocket stage
20,S00020,-,20,1959 ETA,P,P A,Vanguard III,1959 Sep 18,A00034,1959 Sep 18 0529,Attached to A00034 X-248-A2,-,-,-,-,-,-,-,-,Payload


#### Debris type descriptions

In [697]:
df["active_desc"].value_counts()

Debris          12731
Payload          3446
Litter           3128
Rocket stage     1976
Name: active_desc, dtype: int64

#### Split the messy separation date column

In [698]:
df[["year", "month", "day", "other1", "other2"]] = (
    df["sdate"].str.replace("?", "", regex=False).str.split(" ", expand=True)
)

#### Fill in missing days

In [699]:
df.day = df.day.fillna("1")

In [700]:
df["day"] = 1

#### Clean up months

In [701]:
df["month"] = (
    df["month"]
    .str.replace("Jan", "1", regex=False)
    .str.replace("Feb", "2", regex=False)
    .str.replace("Mar", "3", regex=False)
    .str.replace("Apr", "4", regex=False)
    .str.replace("May", "5", regex=False)
    .str.replace("Jun", "6", regex=False)
    .str.replace("Jul", "7", regex=False)
    .str.replace("Aug", "8", regex=False)
    .str.replace("Sep", "9", regex=False)
    .str.replace("Oct", "10", regex=False)
    .str.replace("Nov", "11", regex=False)
    .str.replace("Dec", "12", regex=False)
)

In [702]:
df["separation_date"] = pd.to_datetime(df[["year", "month", "day"]])

In [703]:
df["separation_year"] = df["year"]

In [704]:
df.head()

Unnamed: 0,#jcat,deepcat,satcat,piece,active,type,name,ldate,parent,sdate,expandedstatus,ddate,odate,perigee,pf,apogee,af,inc,if,active_desc,year,month,day,other1,other2,separation_date,separation_year
5,S00005,-,5,1958 BET 2,P,P,Vanguard I,1958 Mar 17,S00016,1958 Mar 17 1224,In Earth orbit,-,2022 Feb 13,652,,3830,,34.25,,Payload,1958,3,1,1224,,1958-03-01,1958
11,S00011,-,11,1959 ALP 1,P,P,Vanguard II,1959 Feb 17,S00012,1959 Feb 17 1605,In Earth orbit,-,2022 Feb 14,553,,2935,,32.87,,Payload,1959,2,1,1605,,1959-02-01,1959
12,S00012,-,12,1959 ALP 2,R,R3,GRC 33-KS-2800,1959 Feb 17,R02749,1959 Feb 17 1604,In Earth orbit,-,2022 Feb 14,553,,3324,,32.90,,Rocket stage,1959,2,1,1604,,1959-02-01,1959
16,S00016,-,16,1958 BET 1,R,R3,GRC 33-KS-2800,1958 Mar 17,R02064,1958 Mar 17 1223,In Earth orbit,-,2022 Feb 14,650,,4225,,34.27,,Rocket stage,1958,3,1,1223,,1958-03-01,1958
20,S00020,-,20,1959 ETA,P,P A,Vanguard III,1959 Sep 18,A00034,1959 Sep 18 0529,Attached to A00034 X-248-A2,-,-,-,-,-,-,-,-,Payload,1959,9,1,529,,1959-09-01,1959


In [705]:
df_slim = df[
    [
        "piece",
        "name",
        "active_desc",
        "separation_date",
        "separation_year",
    ]
]

In [706]:
df_slim.separation_year.value_counts()

In [726]:
cum_sum_year = (
    df_slim.groupby(["separation_year"])
    .agg({"piece": "size"})["piece"]
    .cumsum()
    .reset_index()
)

In [728]:
alt.Chart(cum_sum_year).mark_line().encode(x="separation_year", y="piece").properties(
    width=650, height=350
)

---

## Spack-track.org data

In [707]:
space_track_df = pd.read_csv(
    "data/processed/space_track_active.csv", dtype={"norad_cat_id": str}
)

In [708]:
space_track_slim = space_track_df[["intldes", "object_type", "country", "launch_year"]]

In [709]:
space_track_slim.head()

Unnamed: 0,intldes,object_type,country,launch_year
0,1965-092D,ROCKET BODY,CIS,1965
1,1966-078A,PAYLOAD,CIS,1966
2,1966-084B,ROCKET BODY,US,1966
3,1966-095B,ROCKET BODY,US,1966
4,1967-035B,ROCKET BODY,US,1967


---

## Merge them

In [710]:
merge = pd.merge(space_track_slim, df_slim, right_on="piece", left_on="intldes")

In [711]:
merge.head()

Unnamed: 0,intldes,object_type,country,launch_year,piece,name,active_desc,separation_date,separation_year
0,1966-095B,ROCKET BODY,US,1966,1966-095B,Centaur D AC-9,Rocket stage,1966-10-01,1966
1,1967-035B,ROCKET BODY,US,1967,1967-035B,Centaur D AC-12,Rocket stage,1967-04-01,1967
2,1968-001B,ROCKET BODY,US,1968,1968-001B,Centaur AC-15,Rocket stage,1968-01-01,1968
3,1969-036A,PAYLOAD,US,1969,1969-036A,OPS 3148,Payload,1969-04-01,1969
4,1971-021A,PAYLOAD,US,1971,1971-021A,OPS 4788,Payload,1971-03-01,1971


---

## Aggregate

#### How many of each type of debris, by year

In [713]:
active_grouped_count = (
    merge.groupby(["separation_year", "active_desc"])
    .agg({"intldes": "size"})
    .reset_index()
).rename(columns={"intldes": "count"})

In [714]:
active_grouped_count.tail()

Unnamed: 0,separation_year,active_desc,count
231,2021,Payload,15
232,2021,Rocket stage,54
233,2022,Litter,8
234,2022,Payload,1
235,2022,Rocket stage,4


In [715]:
alt.Chart(active_grouped_count).mark_bar().encode(
    x="separation_year", y="count", color="active_desc"
).properties(width=650, height=350)

In [600]:
merged_cumsum = (
    merge.groupby(["type_coarse_desc", "separation_year"])["intldes"]
    .size()
    .groupby(level="type_coarse_desc")
    .cumsum()
    .reset_index(name="cumsum")
)

In [603]:
merged_cumsum.head()

Unnamed: 0,type_coarse_desc,separation_year,cumsum
0,Component,1963,46
1,Component,1964,61
2,Component,1965,92
3,Component,1966,123
4,Component,1967,142


In [604]:
alt.Chart(merged_cumsum).mark_bar().encode(
    x="separation_year", y="cumsum", color="type_coarse_desc"
).properties(width=650, height=350)

In [271]:
active_grouped_cumsum.to_csv(
    "data/processed/active_debris_categories_cumsum.csv", index=False
)