In [None]:
# Imports
import sqlite3
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
# Load in data set
satellite_df = pd.read_csv('data/UCS-Satellite-DB.csv')
satellite_df.head()

Unnamed: 0,Current Official Name of Satellite,Country of Operator/Owner,Users,Purpose,Detailed Purpose,Class of Orbit,Date of Launch,Launch Site,Unnamed: 8,Unnamed: 9,...,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38
0,1HOPSAT-TD,USA,Commercial,Earth Observation,Infrared Imaging,LEO,12/11/2019,Satish Dhawan Space Centre,,,...,,,,,,,,,,
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,Automatic Identification System (AIS),LEO,1/3/2023,Cape Canaveral,,,...,,,,,,,,,,
2,Aalto-1,Finland,Civil,Technology Development,,LEO,6/23/2017,Satish Dhawan Space Centre,,,...,,,,,,,,,,
3,AAt-4,Denmark,Civil,Earth Observation,Automatic Identification System (AIS),LEO,4/25/2016,Guiana Space Center,,,...,,,,,,,,,,
4,ABS-2,Multinational,Commercial,Communications,,GEO,2/6/2014,Guiana Space Center,,,...,,,,,,,,,,


In [24]:
satellite_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7560 entries, 0 to 7559
Data columns (total 39 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   Current Official Name of Satellite  7560 non-null   object
 1   Country of Operator/Owner           7560 non-null   object
 2   Users                               7560 non-null   object
 3   Purpose                             7560 non-null   object
 4   Detailed Purpose                    1254 non-null   object
 5   Class of Orbit                      7560 non-null   object
 6   Date of Launch                      7559 non-null   object
 7   Launch Site                         7560 non-null   object
 8   Unnamed: 8                          484 non-null    object
 9   Unnamed: 9                          484 non-null    object
 10  Unnamed: 10                         484 non-null    object
 11  Unnamed: 11                         484 non-null    obje

In [None]:
# Remove Null columns or columns that have information I don't need
cleaned_satellite_df = satellite_df[['Current Official Name of Satellite', 'Country of Operator/Owner', 'Users', 'Purpose', 'Class of Orbit', 'Date of Launch', 'Launch Site']]
cleaned_satellite_df

Unnamed: 0,Current Official Name of Satellite,Country of Operator/Owner,Users,Purpose,Class of Orbit,Date of Launch,Launch Site
0,1HOPSAT-TD,USA,Commercial,Earth Observation,LEO,12/11/2019,Satish Dhawan Space Centre
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,LEO,1/3/2023,Cape Canaveral
2,Aalto-1,Finland,Civil,Technology Development,LEO,6/23/2017,Satish Dhawan Space Centre
3,AAt-4,Denmark,Civil,Earth Observation,LEO,4/25/2016,Guiana Space Center
4,ABS-2,Multinational,Commercial,Communications,GEO,2/6/2014,Guiana Space Center
...,...,...,...,...,...,...,...
7555,Ziyuan 1-02C,China,Government,Earth Observation,LEO,12/22/2011,Taiyuan Launch Center
7556,Ziyuan 1-2D,China,Government,Earth Observation,LEO,9/14/2019,Taiyuan Launch Center
7557,Ziyuan 3,China,Government,Earth Observation,LEO,1/9/2012,Taiyuan Launch Center
7558,Ziyuan 3-2,China,Government,Earth Observation,LEO,5/29/2016,Taiyuan Launch Center


In [None]:
# Figure out all 'Purpose' categories and get an idea of how many there are
cleaned_satellite_df.groupby('Purpose').size()

Purpose
Communications                                    5514
Communications/Maritime Tracking                     5
Communications/Navigation                            1
Communications/Technology Development                4
Earth Observation                                 1235
Earth Observation                                    3
Earth Observation/Communications                     2
Earth Observation/Communications/Space Science       1
Earth Observation/Earth Science                      1
Earth Observation/Navigation                         9
Earth Observation/Space Science                      1
Earth Observation/Technology Development             7
Earth Science                                       28
Earth Science/Earth Observation                      1
Earth/Space Observation                              4
Educational                                          1
Meteorological                                       6
Mission Extension Technology                         2
Na

In [None]:
# Figuring out a way to narrow down my scope, decided to just do satellites that have been launched since 2020
cleaned_satellite_df.groupby('Date of Launch').size()

Date of Launch
1/1/2000      1
1/10/2007     2
1/10/2015     1
1/10/2019     1
1/10/2023    30
             ..
9/8/2016      1
9/9/2003      1
9/9/2005      1
9/9/2012      1
9/9/2021      1
Length: 1224, dtype: int64

In [None]:
# Change Date of Launch column to date time object so I can put year in separate column later
cleaned_satellite_df['Date of Launch'] = pd.to_datetime(cleaned_satellite_df['Date of Launch'], format='mixed', errors='coerce')
cleaned_satellite_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_satellite_df['Date of Launch'] = pd.to_datetime(cleaned_satellite_df['Date of Launch'], format='mixed', errors='coerce');


Unnamed: 0,Current Official Name of Satellite,Country of Operator/Owner,Users,Purpose,Class of Orbit,Date of Launch,Launch Site
0,1HOPSAT-TD,USA,Commercial,Earth Observation,LEO,2019-12-11,Satish Dhawan Space Centre
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,LEO,2023-01-03,Cape Canaveral
2,Aalto-1,Finland,Civil,Technology Development,LEO,2017-06-23,Satish Dhawan Space Centre
3,AAt-4,Denmark,Civil,Earth Observation,LEO,2016-04-25,Guiana Space Center
4,ABS-2,Multinational,Commercial,Communications,GEO,2014-02-06,Guiana Space Center
...,...,...,...,...,...,...,...
7555,Ziyuan 1-02C,China,Government,Earth Observation,LEO,2011-12-22,Taiyuan Launch Center
7556,Ziyuan 1-2D,China,Government,Earth Observation,LEO,2019-09-14,Taiyuan Launch Center
7557,Ziyuan 3,China,Government,Earth Observation,LEO,2012-01-09,Taiyuan Launch Center
7558,Ziyuan 3-2,China,Government,Earth Observation,LEO,2016-05-29,Taiyuan Launch Center


In [30]:
# Make a new column with just the year
cleaned_satellite_df['Launch Year'] = cleaned_satellite_df['Date of Launch'].dt.year
cleaned_satellite_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_satellite_df['Launch Year'] = cleaned_satellite_df['Date of Launch'].dt.year


Unnamed: 0,Current Official Name of Satellite,Country of Operator/Owner,Users,Purpose,Class of Orbit,Date of Launch,Launch Site,Launch Year
0,1HOPSAT-TD,USA,Commercial,Earth Observation,LEO,2019-12-11,Satish Dhawan Space Centre,2019.0
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,LEO,2023-01-03,Cape Canaveral,2023.0
2,Aalto-1,Finland,Civil,Technology Development,LEO,2017-06-23,Satish Dhawan Space Centre,2017.0
3,AAt-4,Denmark,Civil,Earth Observation,LEO,2016-04-25,Guiana Space Center,2016.0
4,ABS-2,Multinational,Commercial,Communications,GEO,2014-02-06,Guiana Space Center,2014.0
...,...,...,...,...,...,...,...,...
7555,Ziyuan 1-02C,China,Government,Earth Observation,LEO,2011-12-22,Taiyuan Launch Center,2011.0
7556,Ziyuan 1-2D,China,Government,Earth Observation,LEO,2019-09-14,Taiyuan Launch Center,2019.0
7557,Ziyuan 3,China,Government,Earth Observation,LEO,2012-01-09,Taiyuan Launch Center,2012.0
7558,Ziyuan 3-2,China,Government,Earth Observation,LEO,2016-05-29,Taiyuan Launch Center,2016.0


In [None]:
# Narrow it down to launches within the last 5 years
filtered_df = cleaned_satellite_df[cleaned_satellite_df['Launch Year'] >= 2020]
filtered_df

Unnamed: 0,Current Official Name of Satellite,Country of Operator/Owner,Users,Purpose,Class of Orbit,Date of Launch,Launch Site,Launch Year
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,LEO,2023-01-03,Cape Canaveral,2023.0
9,Adelis-Sampson 1,Israel,Government,Technology Development,LEO,2021-03-22,Baikonur Cosmodrome,2021.0
10,Adelis-Sampson 2,Israel,Government,Technology Development,LEO,2021-03-22,Baikonur Cosmodrome,2021.0
11,Adelis-Sampson 3,Israel,Government,Technology Development,LEO,2021-03-22,Baikonur Cosmodrome,2021.0
12,ADLER-2,Austria,Civil,Earth Science,LEO,2023-04-15,Vandenberg AFB,2023.0
...,...,...,...,...,...,...,...,...
7533,Zhongxing 26,China,Commercial,Communications,GEO,2023-02-23,Xichang Satellite Launch Center,2023.0
7537,Zhongxing 2E,China,Government/Commercial,Communications,GEO,2021-08-05,Xichang Satellite Launch Center,2021.0
7540,Zhongxing 6D,China,Government/Commercial,Communications,GEO,2022-04-15,Xichang Satellite Launch Center,2022.0
7542,Zhongxing 9B,China,Government,Communications,GEO,2021-09-09,Xichang Satellite Launch Center,2021.0


In [None]:
# Check for null values in my final data set
filtered_df.isnull().sum()

Current Official Name of Satellite    0
Country of Operator/Owner             0
Users                                 0
Purpose                               0
Class of Orbit                        0
Date of Launch                        0
Launch Site                           0
Launch Year                           0
dtype: int64