In [None]:

import numpy as np 
import pandas as pd 
import seaborn as sns

#importing plotting libraries

import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot, plot
import matplotlib.pyplot as plt
init_notebook_mode(connected=False)

#importing modeling libraries
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("../input/all-space-missions-from-1957/Space_Corrected.csv")
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
#checking null values
df.isna().sum()

data pre-processing

In [None]:
# Extract the launch year
df['DateTime'] = pd.to_datetime(df['Datum'])
df['Year'] = df['DateTime'].apply(lambda datetime: datetime.year)

# Extract the country of launch
df["Country"] = df["Location"].apply(lambda location: location.split(", ")[-1])

df.head(10)

In [None]:
df['Company Name'].value_counts()

data visualisation

In [None]:
# Year vs Number of launches
plt.figure(figsize=(8,18))
ax = sns.countplot(y=df['Year'])
ax.axes.set_title("Year vs. # Launches",fontsize=18)
ax.set_xlabel("Year",fontsize=16)
ax.set_ylabel("# Launches",fontsize=16)
ax.tick_params(labelsize=12)
plt.tight_layout()
plt.show()

In [None]:
# creating a single list containing the names of the Launch Vehicles
details = []
for detail in df.Detail.values:
    d = [x.strip() for x in detail.split('|')]
    for ele in d:
        if('Cosmos' in ele):
            details.append('Cosmos')
        elif('Vostok' in ele):
            details.append('Vostok')
        elif('Tsyklon' in ele):
            details.append('Tsyklon')
        elif('Ariane' in ele):
            details.append('Ariane')
        elif('Atlas' in ele):
            details.append('Atlas')
        elif('Soyuz' in ele):
            details.append('Soyuz')
        elif('Delta' in ele):
            details.append('Delta')
        elif('Titan' in ele):
            details.append('Titan')
        elif('Molniya' in ele):
            details.append('Molniya')
        elif('Zenit' in ele):
            details.append('Zenit')
        elif('Falcon' in ele):
            details.append('Falcon')
        elif('Long March' in ele):
            details.append('Long March')
        elif('PSLV' in ele):
            details.append('PSLV')
        elif('GSLV' in ele):
            details.append('GSLV')
        elif('Thor' in ele):
            details.append('Thor')
        else:
            details.append('Other')

In [None]:
counts = dict(pd.Series(details).value_counts(sort = True))
fig = go.Figure(go.Bar(x = list(counts.keys()), y = list(counts.values())))
fig.update_layout(template = 'ggplot2',margin=dict(l=80, r=80, t=50, b=10),
                  title = { 'text' : '<b>Number of Missions in each type of Launch Vehicle</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#ff3434',
                 yaxis_title = '<b>Number of Missions</b>',xaxis_title = '<b>Launch Vehicle</b>',)
fig.show()

In [None]:
country_counts = dict(df['Country'].value_counts())
fig = go.Figure(data=[go.Table(
    header=dict(values=['<b>Country Name</b>', '<b>Number of Space Missions</b>'],
                line_color='black',
                fill_color='blue',
                align='left',
                font=dict(color='black', size=14)),
    cells=dict(values=[list(country_counts.keys()),
                      list(country_counts.values())],
               line_color='black',
               fill_color='white',
               align='left',
               font=dict(color='black', size=13)))
])

fig.update_layout(width=500, height=450,margin=dict(l=80, r=80, t=25, b=10),
                  title = { 'text' : '<b>Number of Space Missions Per Launch Location</b>', 'x' : 0.95},
                 font_family = 'Fira Code',title_font_color= '#ff0d00')
fig.show()

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(df['Status Mission'])
colors = {0 : 'red', 1 : 'Orange', 2 : 'Yellow', 3 : 'Green'}

In [None]:
fig = px.sunburst(df,path = ['Status Mission','Country'])
fig.update_layout(margin=dict(l=80, r=80, t=25, b=10),
                  title = { 'text' : '<b>Countries and Mission Status</b>', 'x' : 0.5},
                 font_family = 'Fira Code',title_font_color= '#8000ff')
fig.show()

In [None]:
sns.countplot(df['Status Mission'])

In [None]:
plt.figure(figsize=(10,10))
sns.set(style="darkgrid")
ax = sns.countplot(x = 'Company Name',data=df,order=['RVSN USSR','NASA','ISRO','SpaceX','CASC'],hue= "Status Mission",)
for p in ax.patches:
        ax.annotate(format(p.get_height()), (p.get_x()-0.05, p.get_height()+20))

In [None]:
plt.figure(figsize=(10,10))
sns.set(style="darkgrid")
sns.countplot(y = 'Country',data=df)

In [None]:
plt.figure(figsize=(10,10))
country = df.groupby('Country')
per_usa = len(country.get_group('USA'))
per_russia = len(country.get_group('Russia'))
per_china = len(country.get_group('China'))
per_india= len(country.get_group('India'))
per_france = len(country.get_group('France'))
per = [per_usa,per_russia,per_china,per_india,per_france]
labels = ['USA','Russia','China','India','France']
plt.figsize = (20,20)
plt.pie(per,labels= labels,explode=[0,0,0,0.1,0], autopct='%1.1f%%')

In [None]:
plt.figure(figsize=(10,10))
st = df['Status Rocket'].value_counts()
plt.pie(st,shadow=False,autopct='%1.1f%%',colors=('tab:red', 'tab:blue'),explode=(0,0.05),startangle=40)
plt.legend(['Stattus Retired','Status Acitve'])
plt.title('Status Rocket', fontsize=18)
plt.show()