# Get SPACE TRACK data

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
from datetime import timedelta
import numpy as np
import glob
import json

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

## Jonathan McDowell's catalogue

In [46]:
src = pd.read_csv(
    "https://planet4589.org/space/gcat/tsv/derived/currentcat.tsv",
    sep="\t",
    low_memory=False,
    skiprows=0,
)

In [47]:
src.columns = src.columns.str.lower()

In [48]:
src_df = src.drop([0])

In [51]:
src_df.active.value_counts()

D    24777
C    15047
P     9814
R     6954
A     4957
Z        8
Name: active, dtype: int64

In [49]:
debris_categories = ["D", "C", "R", "P"]

In [8]:
df = src_df[
    (src_df["expandedstatus"].str.contains("In Earth orbit|Attached to"))
    & (src_df["active"].isin(debris_categories))
].copy()

In [9]:
df.active.value_counts()

D    12731
P     3446
C     3128
R     1976
Name: active, dtype: int64

#### Get the debris types 

In [10]:
types = {
    "P": "Payload",
    "C": "Litter",
    "R": "Rocket stage",
    "D": "Debris",
}

In [11]:
df["active_desc"] = df["active"].map(types)

In [12]:
df.head()

Unnamed: 0,#jcat,deepcat,satcat,piece,active,type,name,ldate,parent,sdate,expandedstatus,ddate,odate,perigee,pf,apogee,af,inc,if,active_desc
5,S00005,-,5,1958 BET 2,P,P,Vanguard I,1958 Mar 17,S00016,1958 Mar 17 1224,In Earth orbit,-,2022 Feb 13,652,,3830,,34.25,,Payload
11,S00011,-,11,1959 ALP 1,P,P,Vanguard II,1959 Feb 17,S00012,1959 Feb 17 1605,In Earth orbit,-,2022 Feb 14,553,,2935,,32.87,,Payload
12,S00012,-,12,1959 ALP 2,R,R3,GRC 33-KS-2800,1959 Feb 17,R02749,1959 Feb 17 1604,In Earth orbit,-,2022 Feb 14,553,,3324,,32.90,,Rocket stage
16,S00016,-,16,1958 BET 1,R,R3,GRC 33-KS-2800,1958 Mar 17,R02064,1958 Mar 17 1223,In Earth orbit,-,2022 Feb 14,650,,4225,,34.27,,Rocket stage
20,S00020,-,20,1959 ETA,P,P A,Vanguard III,1959 Sep 18,A00034,1959 Sep 18 0529,Attached to A00034 X-248-A2,-,-,-,-,-,-,-,-,Payload


#### Debris type descriptions

In [13]:
df["active_desc"].value_counts()

Debris          12731
Payload          3446
Litter           3128
Rocket stage     1976
Name: active_desc, dtype: int64

#### Split the messy separation date column

In [14]:
df[["year", "month", "day", "other1", "other2"]] = (
    df["sdate"].str.replace("?", "", regex=False).str.split(" ", expand=True)
)

#### Fill in missing days

In [15]:
df.day = df.day.fillna("1")

In [16]:
df["day"] = 1

#### Clean up months

In [17]:
df["month"] = (
    df["month"]
    .str.replace("Jan", "1", regex=False)
    .str.replace("Feb", "2", regex=False)
    .str.replace("Mar", "3", regex=False)
    .str.replace("Apr", "4", regex=False)
    .str.replace("May", "5", regex=False)
    .str.replace("Jun", "6", regex=False)
    .str.replace("Jul", "7", regex=False)
    .str.replace("Aug", "8", regex=False)
    .str.replace("Sep", "9", regex=False)
    .str.replace("Oct", "10", regex=False)
    .str.replace("Nov", "11", regex=False)
    .str.replace("Dec", "12", regex=False)
)

In [18]:
df["separation_date"] = pd.to_datetime(df[["year", "month", "day"]])

In [19]:
df["separation_year"] = df["year"]

In [20]:
df.head()

Unnamed: 0,#jcat,deepcat,satcat,piece,active,type,name,ldate,parent,sdate,expandedstatus,ddate,odate,perigee,pf,apogee,af,inc,if,active_desc,year,month,day,other1,other2,separation_date,separation_year
5,S00005,-,5,1958 BET 2,P,P,Vanguard I,1958 Mar 17,S00016,1958 Mar 17 1224,In Earth orbit,-,2022 Feb 13,652,,3830,,34.25,,Payload,1958,3,1,1224,,1958-03-01,1958
11,S00011,-,11,1959 ALP 1,P,P,Vanguard II,1959 Feb 17,S00012,1959 Feb 17 1605,In Earth orbit,-,2022 Feb 14,553,,2935,,32.87,,Payload,1959,2,1,1605,,1959-02-01,1959
12,S00012,-,12,1959 ALP 2,R,R3,GRC 33-KS-2800,1959 Feb 17,R02749,1959 Feb 17 1604,In Earth orbit,-,2022 Feb 14,553,,3324,,32.90,,Rocket stage,1959,2,1,1604,,1959-02-01,1959
16,S00016,-,16,1958 BET 1,R,R3,GRC 33-KS-2800,1958 Mar 17,R02064,1958 Mar 17 1223,In Earth orbit,-,2022 Feb 14,650,,4225,,34.27,,Rocket stage,1958,3,1,1223,,1958-03-01,1958
20,S00020,-,20,1959 ETA,P,P A,Vanguard III,1959 Sep 18,A00034,1959 Sep 18 0529,Attached to A00034 X-248-A2,-,-,-,-,-,-,-,-,Payload,1959,9,1,529,,1959-09-01,1959


In [21]:
df_slim = df[
    [
        "piece",
        "name",
        "active_desc",
        "separation_date",
        "separation_year",
    ]
].copy()

In [22]:
df_slim.separation_year.value_counts()

2007    3088
2021    1690
2009    1685
2015     832
1981     783
2020     715
1991     545
1977     500
2019     457
2018     432
1985     429
1970     414
1992     407
2000     333
2014     328
1975     319
2006     288
1973     287
1990     285
2010     283
2017     273
1993     271
2001     258
1984     253
1999     252
1965     244
1988     237
1987     236
1961     227
2004     227
1976     227
1996     226
1998     226
1989     211
2016     210
2013     209
1983     203
1986     199
1994     198
2002     181
2008     172
1978     172
1968     156
1980     151
1982     149
2011     147
1971     146
2012     143
2005     138
1997     138
2003     131
1969     127
1979     124
1995     115
1963     100
1967      99
1972      97
1974      93
1966      88
1964      60
1962      32
2022      15
1960      12
1959       5
1958       3
Name: separation_year, dtype: int64

In [23]:
cum_sum_year = (
    df_slim.groupby(["separation_year"])
    .agg({"piece": "size"})["piece"]
    .cumsum()
    .reset_index()
)

In [24]:
alt.Chart(cum_sum_year).mark_line().encode(x="separation_year", y="piece").properties(
    width=650, height=350
)

---

## Aggregate

#### How many of each type of debris, by year

In [25]:
# create month period column  for correct ordering
df_slim["month"] = df_slim["separation_date"].dt.to_period("m")

In [26]:
df_slim["active_desc"] = df_slim["active_desc"].str.lower()

In [27]:
year_count_category = (
    df_slim.groupby(by=["separation_year", "active_desc"])
    .agg({"piece": "size"})
    .reset_index()
)

In [28]:
year_count_category_pivot = (
    year_count_category.pivot_table(
        columns="active_desc", values="piece", index="separation_year"
    )
    .reset_index()
    .fillna(0)
)

In [29]:
year_count_category_melt = pd.melt(
    year_count_category_pivot,
    id_vars="separation_year",
    value_vars=["debris", "litter", "payload", "rocket stage"],
    var_name="type",
    value_name="count",
)

In [30]:
categories_years = (
    year_count_category_melt.groupby(["separation_year", "type"])
    .sum()
    .groupby(level=1)
    .cumsum()
    .reset_index()
)

In [45]:
alt.Chart(categories_years).mark_area().encode(
    x="separation_year", y="count", color="type"
).properties(width=350, height=400)

In [41]:
categories_years_pivot = categories_years.pivot_table(
    columns="type", index="separation_year", values="count"
).reset_index()

In [42]:
categories_years_pivot

type,separation_year,debris,litter,payload,rocket stage
0,1958,0.0,1.0,1.0,1.0
1,1959,0.0,1.0,4.0,3.0
2,1960,0.0,6.0,8.0,6.0
3,1961,190.0,25.0,20.0,12.0
4,1962,190.0,36.0,35.0,18.0
5,1963,203.0,94.0,57.0,25.0
6,1964,203.0,115.0,83.0,38.0
7,1965,337.0,153.0,134.0,59.0
8,1966,340.0,186.0,167.0,78.0
9,1967,342.0,216.0,212.0,100.0


In [43]:
categories_years_pivot.to_csv(
    "data/processed/stacked_categories_debris_years.csv", index=False
)