In [30]:
# Data handling tools
import numpy as np
import pandas as pd

# Visualization tools
import seaborn as sns
import matplotlib.pyplot as plt

# Pre-machine learning tools
from sklearn.preprocessing import StandardScaler, LabelEncoder  # Perform data standardization and auto values encoder
from sklearn.model_selection import train_test_split  # Split data

# Machine learning algorithms and tools
from sklearn.neighbors import KNeighborsClassifier  # KNN Classifier
from sklearn.tree import DecisionTreeClassifier  # Decision Tree for classification
from sklearn.model_selection import GridSearchCV  # Exhaustive search over specified parameter values for an estimator

# Post-machine learning analysis
from sklearn.metrics import classification_report, plot_confusion_matrix, accuracy_score  # Report precision, recall, f1-score, and more

# Other
from warnings import filterwarnings  # Warning filters
filterwarnings(action='ignore') # Prevent convergence warning and math errors


In [55]:
# loading Dataset
df=pd.read_csv("../input/all-the-satellites-in-the-orbit/AllSatellites.csv")
df.head(5)


In [32]:
df = df.sort_values(by = 'Launched in', ascending = True)
df

In [54]:
df["Name"]=df["Name"].str.replace('\n','')
df["Name"]=df["Name"].str.replace('\r','')
df["Name"]=df["Name"].str.replace('-','')
df["Name"]=df["Name"].str.replace(',','')# Data changes only once
df["Name"]=df["Name"].str.replace('/','')# Data changes only once
df["Name"]=df["Name"].str.replace(' ','')# Data changes only once
df

In [33]:
df['Organisation'].nunique() 
df["Organisation"]=df["Organisation"].str.replace('JAXA - Japan','Japan')
df["Organisation"]=df["Organisation"].str.replace('J-spacesystems - Japan','Japan')
df["Organisation"]=df["Organisation"].str.replace('NPO - Mashinostroyeniya','Mashinostroyeniya')
df["Organisation"]=df["Organisation"].str.replace('NASA - USA','USA')
df["Organisation"]=df["Organisation"].str.replace('ASAL - Algeria','Algeria')
df["Organisation"]=df["Organisation"].str.replace('NRSCC - China','China')
df["Organisation"]=df["Organisation"].str.replace('TUBITAK UZAY/STRI- Turkey','Turkey')
df["Organisation"]=df["Organisation"].str.replace('DLR - Germany','Germany')
df["Organisation"]=df["Organisation"].str.replace('BlackSky - USA','USA')
df["Organisation"]=df["Organisation"].str.replace('ISRO - India','India')
df["Organisation"]=df["Organisation"].str.replace('INPE - Brazil','Brazil')
df["Organisation"]=df["Organisation"].str.replace('CNSA - China','China')
df["Organisation"]=df["Organisation"].str.replace('US Naval Research Lab','USA')
df["Organisation"]=df["Organisation"].str.replace('ASI - Italy','ASI - Italy')
df["Organisation"]=df["Organisation"].str.replace('ROSCOSMOS - Russia','Russia')
df["Organisation"]=df["Organisation"].str.replace('CAST - China','China')
df["Organisation"]=df["Organisation"].str.replace('KARI - Korea','Korea')
df["Organisation"]=df["Organisation"].str.replace('ImageSAT International','ESA')
df["Organisation"]=df["Organisation"].str.replace('CNES - France','France')
df["Organisation"]=df["Organisation"].str.replace('NOAA','USA')
df["Organisation"]=df["Organisation"].str.replace('EUMETSAT','ESA')
df["Organisation"]=df["Organisation"].str.replace('JMA - Japan','Japan')
df["Organisation"]=df["Organisation"].str.replace('CSA - Canadian Space Agency','CANADA')
df["Organisation"]=df["Organisation"].str.replace('NSPO - Taiwan','Taiwan')
df["Organisation"]=df["Organisation"].str.replace('NSMC - China','China')
df["Organisation"]=df["Organisation"].str.replace('Maxar/DigitalGlobe - USA','USA')
df["Organisation"]=df["Organisation"].str.replace('US Department of Energy','USA')
df["Organisation"]=df["Organisation"].str.replace('CONAE - Argentina','Argentina')
df["Organisation"]=df["Organisation"].str.replace('ROSHYDROMET - Russia','Russia')
df["Organisation"]=df["Organisation"].str.replace('SNSB - Sweden','Sweden')
df["Organisation"]=df["Organisation"].str.replace('CSIRO - Australia','Australia')
df["Organisation"]=df["Organisation"].str.replace('US Department of Defense','USA')
df["Organisation"]=df["Organisation"].str.replace('UK Space Agency','UK')
df["Organisation"]=df["Organisation"].str.replace('NASRDA - Nigeria','Nigeria')
df["Organisation"]=df["Organisation"].str.replace('LAPAN - Indonesia','Indonesia')
df["Organisation"]=df["Organisation"].str.replace('MDA - Canada','CANADA')
df["Organisation"]=df["Organisation"].str.replace('Planet - USA','USA')
df["Organisation"]=df["Organisation"].str.replace('IAI - Israel','Israel')
df["Organisation"]=df["Organisation"].str.replace('Airbus Defence and Space','USA')
df["Organisation"]=df["Organisation"].str.replace('ATSB - Malaysia','Malaysia')
df["Organisation"]=df["Organisation"].str.replace('ICEYE - Finland','Finland')
df["Organisation"]=df["Organisation"].str.replace('SSTL - UK','UK')
df["Organisation"]=df["Organisation"].str.replace('GISTDA - Thailand','Thailand')
df["Organisation"]=df["Organisation"].str.replace('NSAU - Ukraine','Ukraine')
df["Organisation"]=df["Organisation"].str.replace('ASI - Italy','Italy')
df["Organisation"]=df["Organisation"].str.replace('GEOSAT - Spain','Spain')
df["Organisation"]=df["Organisation"].str.replace('CMA - China','China')
df["Organisation"]=df["Organisation"].str.replace('UrtheCast - Canada','CANADA')
df["Organisation"]=df["Organisation"].str.replace('earth-i','Russia')
df["Organisation"]=df["Organisation"].str.replace('SSTL - UK','UK')

#df['Organisation'] = df['Organisation'].astype(int)

# for i in  df['Organisation']:
#     try :
#             int(i)
#     except:
#         try :
#             int(i.replace(',',''))
#         except:
#             print(i)
#df

In [34]:
df.columns

In [35]:
# Duplicates VALUE 
print(f'Duplicates in the dataset: {df.duplicated().sum()}')
print(f'Percentage of duplicates: {df.duplicated().sum()/len(df)*100}%') # if 0.0 % that means No Duplicate data


> obseravation

In datasetb, there are no duplicate values.

In [36]:
#Cardinality 
df.nunique() # To determine the maximum and minimum number of variations in each column of the dataset

In [37]:
# Data validation and preprocessing
df.info()
#Lets now check for null fields
import seaborn as sns
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
df.isnull().sum()

In [38]:
#checking the Organisation 
plt.figure(figsize=(50,15))
sns.countplot(data=df,x = 'Organisation',palette='plasma')

In [39]:
#checking the Out of service since 
plt.figure(figsize=(30,15))
sns.countplot(data=df,x = 'Out of service since',palette='plasma')

In [40]:
#checking the Launched in variable countplot
plt.figure(figsize=(40,15))
sns.countplot(data=df,x = 'Launched in',palette='plasma')

In [41]:
#checking the Orbit Height (km) variable countplot
plt.figure(figsize=(60,15))
sns.countplot(data=df,x = 'Orbit Height (km)',palette='plasma')

In [42]:
#checking the Repeat Cycle (days) variable countplot
plt.figure(figsize=(25,20))
sns.countplot(data=df,x = 'Repeat Cycle (days)',palette='plasma')

In [43]:
#checking the target variable countplot
plt.figure(figsize=(15,15))
sns.countplot(data=df,x = 'Orbit Type',palette='plasma')

In [44]:
import plotly.express as px
import plotly.graph_objects as go

In [45]:
#checking the target variable countplot
plt.figure(figsize=(10,10))
sns.countplot(data=df,x = 'Orbit Type',palette='plasma')


In [46]:
fig =go.Figure(data=[go.Histogram(x=df, cumulative_enabled=True)])
fig.show()

In [47]:
import plotly 
import plotly.express as ex


In [82]:
ex.box(x = 'Orbit Type', y = 'Orbit Height (km)',data_frame = df, template='seaborn', 
       notched=True, width=1000, height=600)


In [83]:
ex.box(x = 'Orbit Type', y = 'Repeat Cycle (days)',data_frame = df, template='seaborn', 
       notched=True, width=1000, height=600)


In [84]:
ex.box(x = 'Orbit Type', y = 'Launched in',data_frame = df, template='seaborn', 
       notched=True, width=1000, height=600)


In [81]:
ex.box(x = 'Orbit Type', y = 'Out of service since',data_frame = df, template='seaborn', 
       notched=True, width=1000, height=500)

* Satellites in circular and near-polar orbits are no longer in service.