In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Data Import and Preparation
* Most of the data are in object type.
* Columns `Unnamed: 0` and `Unnamed: 0.1` contains numerical values which are identical to the index. These two columns will be dropped as they are not needed in data analysis.
* Column ` Rocket` which indicated the cost of the mission has a lot of missing values. It will be dropped since there is not enough high quality data included in this column.
* A new column, `Country`, will be added in the dataset. This will contain the countries where the rocket launch was carried out.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

df = pd.read_csv('/kaggle/input/all-space-missions-from-1957/Space_Corrected.csv', low_memory=False, parse_dates=['Datum'])
df.head()

In [None]:
df.info()

In [None]:
# Remove the following columns: Unnamed: 0, Unnamed: 0.1, Rocket
df.drop(['Unnamed: 0','Unnamed: 0.1',' Rocket'],axis=1,inplace=True)


In [None]:
space_df = pd.DataFrame(df)

# Add Country column from Location column
space_df['Country'] = space_df['Location'].apply(lambda x:x.split()[-1])


# Data analysis - Total Rocket Launches
Below figures visualize the total rocket missions carried out from 1957 taking into account how many of these launches are successful. This will also look into the number of rockets that are currently active and retired. The plots are irrespective of countries and company.
* On average, there are around 67 rocket missions yearly. The height of space exploration was at its peak between 1965 and 1978, when at least 80 rockets were launched into the space each year. The same trend is being seen during the last five years (2016 - present).
* Majority of the rocket missions are successful. Below are the statistics:
    -Success: 3879
    -Failure: 339
    -Partial Failure: 102
    -Premature Failure: 4
* At the moment, there are 790 active rockets. This comprise around 18.3% of the total rockets launched since 1957.

In [None]:
# Yearly rocket missions
datetimes=pd.to_datetime(space_df['Datum'], utc=True)
space_df['LaunchYear'] = datetimes.dt.year
plt.style.use('seaborn-darkgrid')
plt.figure(figsize=(20,8))
fig = sns.countplot(x="LaunchYear", data=space_df)
fig.set(title='Yearly Rocket Missions since 1957',
       xlabel = 'Launch Year',
       ylabel = 'Number of Rocket Missions');
plt.xticks(rotation='vertical');
fig.axhline(space_df['LaunchYear'].value_counts().mean(), ls='--');

In [None]:
# Rocket launches status
plt.figure(figsize=(10, 6))
fig = sns.countplot(x="Status Mission", data=space_df)
fig.set(title='Total Successful/Failed Launch Missions',
       xlabel = 'Mission Status',
       ylabel = 'Counts');

In [None]:
# Status of the rockets
plt.figure(figsize=(10, 6))
fig = sns.countplot(x="Status Rocket", data=space_df)
fig.set(title='Active and Retired Rockets',
       xlabel = 'Status of Rockets',
       ylabel = 'Counts');

# Data Analysis - by Location, Country, Company
This will cover the other details of the dataset with respect to the rocket missions. This will take into account where the rockets were launched, which company manufactured the rockets and the current status of rockets per country.
* Most rocket missions are carried out by Russia and the United States, with each country totaling almost 1400 rocket launches since 1957. Kazakhstan, France, China, and Japan also have contributed considerably in space exploration.
* Outside of the United States, the two most commonly used space launch facilities are the Baikonur Cosmodrome (Kazakhstan), Plesetsk Cosmodrome (Russia). In the United States, most space rockets were launched in Florida (Kennedy Space Center and Cape Canaveral AFS) and California (Vandenberg AFB).
* Russia (1303) and the United States (1186) have the most number of successful rocket launches. They are followed by Kazakhstan, France, China and Japan.
* Currently, China has the most number of active space rockets. It is followed by the United States and France.
* Majority of the rockets that were used for space exploration were manufactured by RVSN USSR. Other notable rocket manufacturers are: SpaceX, NASA, Boeing, General Dynamics.

In [None]:
fig = space_df['Country'].value_counts().head(10).plot(kind='bar', x='Country', y='index', figsize=(14,4), color='salmon');
fig.set(title='Rocket Missions by Country (Top 10)',
       xlabel = 'Country',
       ylabel = 'Number of Missions');

In [None]:
# Rocket Launches by Location (Top 20)

fig = space_df['Location'].value_counts().head(20).plot(kind='barh', x='index', y='Location', figsize=(14,8), color='salmon');
fig.set(title='Top 20 Space Launch Facilities',
       xlabel = 'Number of Missions',
       ylabel = 'Launch Location');
fig.invert_yaxis()

In [None]:
# Rocket Missions
x = pd.crosstab(space_df['Country'], space_df['Status Mission'])
x = x.sort_values(by='Success',ascending=False)
fig = x.head(7).plot(kind='bar', figsize=(10, 8), color=['salmon','lightblue','orange','lightgreen'])
fig.set(title='Mission Status by Country',
       xlabel = 'Rocket Counts',
       ylabel = 'Country');

In [None]:
# Active and Retired Rockets by Country
x = pd.crosstab(space_df['Country'], space_df['Status Rocket'])
x = x.sort_values(by='StatusActive',ascending=False)
plt.style.use('seaborn-darkgrid')
fig = x.head(7).plot(kind='barh', figsize=(10, 5), color=['lightblue','salmon'])
fig.set(title='Rocket Status by Country',
       xlabel = 'Rocket Counts',
       ylabel = 'Country');
fig.invert_yaxis()

In [None]:
# Rocket launches by companies
fig = space_df['Company Name'].value_counts().head(20).plot(kind='barh', x='index', y='Company', figsize=(14,8), color='salmon');
fig.set(title='Space Rockets by Company/Manufacturer',
       xlabel = 'Number of Rockets',
       ylabel = 'Company Name/Manufacturer');
fig.invert_yaxis()