In [1]:
# Imports
import sqlite3
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
# Load in data set
satellite_df = pd.read_csv('data/UCS-Satellite-DB.csv')
satellite_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7560 entries, 0 to 7559
Data columns (total 39 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   Current Official Name of Satellite  7560 non-null   object
 1   Country of Operator/Owner           7560 non-null   object
 2   Users                               7560 non-null   object
 3   Purpose                             7560 non-null   object
 4   Detailed Purpose                    1254 non-null   object
 5   Class of Orbit                      7560 non-null   object
 6   Date of Launch                      7559 non-null   object
 7   Launch Site                         7560 non-null   object
 8   Unnamed: 8                          484 non-null    object
 9   Unnamed: 9                          484 non-null    object
 10  Unnamed: 10                         484 non-null    object
 11  Unnamed: 11                         484 non-null    obje

In [3]:
# Only keep certain columns
satellite_df = satellite_df[['Current Official Name of Satellite', 'Country of Operator/Owner', 'Users', 'Purpose', 'Class of Orbit', 'Date of Launch', 'Launch Site']]
satellite_df

Unnamed: 0,Current Official Name of Satellite,Country of Operator/Owner,Users,Purpose,Class of Orbit,Date of Launch,Launch Site
0,1HOPSAT-TD,USA,Commercial,Earth Observation,LEO,12/11/2019,Satish Dhawan Space Centre
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,LEO,1/3/2023,Cape Canaveral
2,Aalto-1,Finland,Civil,Technology Development,LEO,6/23/2017,Satish Dhawan Space Centre
3,AAt-4,Denmark,Civil,Earth Observation,LEO,4/25/2016,Guiana Space Center
4,ABS-2,Multinational,Commercial,Communications,GEO,2/6/2014,Guiana Space Center
...,...,...,...,...,...,...,...
7555,Ziyuan 1-02C,China,Government,Earth Observation,LEO,12/22/2011,Taiyuan Launch Center
7556,Ziyuan 1-2D,China,Government,Earth Observation,LEO,9/14/2019,Taiyuan Launch Center
7557,Ziyuan 3,China,Government,Earth Observation,LEO,1/9/2012,Taiyuan Launch Center
7558,Ziyuan 3-2,China,Government,Earth Observation,LEO,5/29/2016,Taiyuan Launch Center


In [4]:
# Rename Columns to be more concise & SQL friendly
satellite_df = satellite_df.rename(columns={
    'Current Official Name of Satellite': 'Satellite_Name',
    'Country of Operator/Owner': 'Operator_Country',
    'Purpose': 'Mission',
    'Class of Orbit': 'Orbit',
    'Launch Site': 'Launch_Site'})
satellite_df

Unnamed: 0,Satellite_Name,Operator_Country,Users,Mission,Orbit,Date of Launch,Launch_Site
0,1HOPSAT-TD,USA,Commercial,Earth Observation,LEO,12/11/2019,Satish Dhawan Space Centre
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,Commercial,Earth Observation,LEO,1/3/2023,Cape Canaveral
2,Aalto-1,Finland,Civil,Technology Development,LEO,6/23/2017,Satish Dhawan Space Centre
3,AAt-4,Denmark,Civil,Earth Observation,LEO,4/25/2016,Guiana Space Center
4,ABS-2,Multinational,Commercial,Communications,GEO,2/6/2014,Guiana Space Center
...,...,...,...,...,...,...,...
7555,Ziyuan 1-02C,China,Government,Earth Observation,LEO,12/22/2011,Taiyuan Launch Center
7556,Ziyuan 1-2D,China,Government,Earth Observation,LEO,9/14/2019,Taiyuan Launch Center
7557,Ziyuan 3,China,Government,Earth Observation,LEO,1/9/2012,Taiyuan Launch Center
7558,Ziyuan 3-2,China,Government,Earth Observation,LEO,5/29/2016,Taiyuan Launch Center


In [12]:
# Convert Launch Date to DateTime
satellite_df['Launch_Date'] = pd.to_datetime(satellite_df['Date of Launch'], yearfirst=True)

# Create a new column to store the year
satellite_df['Launch_Year'] = satellite_df['Launch_Date'].dt.year.convert_dtypes()

# Only keep missions that were launched between 2015 to Present
sats2015_df = satellite_df[satellite_df['Launch_Year'] >= 2015]
sats2015_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6708 entries, 0 to 7559
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Satellite_Name    6708 non-null   object        
 1   Operator_Country  6708 non-null   object        
 2   Users             6708 non-null   object        
 3   Mission           6708 non-null   object        
 4   Orbit             6708 non-null   object        
 5   Date of Launch    6708 non-null   object        
 6   Launch_Site       6708 non-null   object        
 7   Launch_Date       6708 non-null   datetime64[ns]
 8   Launch_Year       6708 non-null   Int64         
dtypes: Int64(1), datetime64[ns](1), object(7)
memory usage: 530.6+ KB


In [13]:
# Check for null values
sats2015_df.isnull().sum()

Satellite_Name      0
Operator_Country    0
Users               0
Mission             0
Orbit               0
Date of Launch      0
Launch_Site         0
Launch_Date         0
Launch_Year         0
dtype: int64

In [14]:
# Moving on to Missions, get an idea of Mission Categories
sats2015_df.groupby('Mission').size()

Mission
Communications                              5083
Earth Observation                           1013
Earth Observation                              3
Earth Observation/Earth Science                1
Earth Observation/Navigation                   9
Earth Observation/Space Science                1
Earth Observation/Technology Development       3
Earth Science                                 25
Earth Science/Earth Observation                1
Earth/Space Observation                        4
Educational                                    1
Meteorological                                 6
Mission Extension Technology                   2
Navigation/Global Positioning                 81
Navigation/Regional Positioning                9
Platform                                       1
Satellite Positioning                          1
Space Observation                              5
Space Science                                 58
Space Science/Technology Demonstration         1
Space Scienc

In [None]:
# Create a new column for Primary Mission and create a function that will pull primary mission from Mission List
def primary_mission(mission):
    primary_msns = [
    'Communications',
    'Earth Observation',
    'Navigation',
    'Meteorological',
    'Space Observation',
    'Space Science',
    'Technology Development',
    'Unknown'
    ] 
    
    if pd.isnull(mission):
        return 'Other'
    
    missions = [m.strip().title() for m in mission.split('/')]

    for m in missions:
        if m in primary_msns:
            return m
    return 'Other'

sats2015_df['Primary_Mission'] = sats2015_df['Mission'].apply(primary_mission)
sats2015_df

In [21]:
sats2015_df.groupby('Primary_Mission').size()

Primary_Mission
Communications            5083
Earth Observation         1031
Meteorological               6
Navigation                  90
Other                      112
Space Observation            9
Space Science               60
Technology Development     307
Unknown                     10
dtype: int64

In [23]:
# Check consistencies in Orbits, make them all upper case
sats2015_df['Orbit'] = sats2015_df['Orbit'].str.strip().str.upper()
sats2015_df.groupby('Orbit').size()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sats2015_df['Orbit'] = sats2015_df['Orbit'].str.strip().str.upper()


Orbit
ELLIPTICAL      24
GEO            250
LEO           6354
MEO             80
dtype: int64