In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scipy.stats as stats

In [2]:
pd.options.display.float_format = '{:,.2f}'.format

# Create locators for ticks on the time axis

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [7]:
df_missions = pd.read_csv('mission_launches.csv')

# Preliminary Data Exploration

**Challenge**: Check out these two DataFrames ☝️.
* What is the shape of df_missions?
* How many rows and columns?
* What are the column names?
* Which years are included in the dataset?
* Are there any NaN values or duplicates?
* What were the average number of missions that took place per decade?

In [None]:
df_missions.shape
df_missions.columns
df_missions.head()

### Check Remove Nan Values

In [None]:
df_missions.isna().values.any() #True
df_missions.duplicated().values.any()
df_missions.info()


In [None]:
nan_rows = df_missions[df_missions.Price.isna()]
df_missions_clean = df_missions.dropna()
print(df_missions.shape)
print(df_missions_clean.shape)

### Check and remove Duplicates


In [None]:
df_missions_clean.duplicated().values.any() #False
# df_missions_clean.drop_duplicates() #This is how to drop duplicates

In [56]:
clean_df = df_missions_clean.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])
clean_df

Unnamed: 0,Organisation,Location,Date,Detail,Rocket_Status,Price,Mission_Status
1605,NASA,"LC-39B, Kennedy Space Center, Florida, USA","Mon Oct 18, 1993 14:53 UTC",Space Shuttle Columbia | STS-58,StatusRetired,450.00,Success
1076,NASA,"LC-39B, Kennedy Space Center, Florida, USA","Mon Oct 07, 2002 19:45 UTC",Space Shuttle Atlantis | STS-112,StatusRetired,450.00,Success
762,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Wed Jul 15, 2009 22:03 UTC",Space Shuttle Endeavour | STS-127,StatusRetired,450.00,Success
1590,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Thu Feb 03, 1994 12:10 UTC",Space Shuttle Discovery | STS-60,StatusRetired,450.00,Success
737,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Mon Feb 08, 2010 09:14 UTC",Space Shuttle Endeavour | STS-130,StatusRetired,450.00,Success
...,...,...,...,...,...,...,...
3560,NASA,"LC-39B, Kennedy Space Center, Florida, USA","Sun May 18, 1969 16:49 UTC",Saturn V | Apollo 10,StatusRetired,,Success
3584,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Mon Mar 03, 1969 16:00 UTC",Saturn V | Apollo 9,StatusRetired,,Success
3603,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Sat Dec 21, 1968 12:51 UTC",Saturn V | Apollo 8,StatusRetired,,Success
3683,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Thu Apr 04, 1968 12:00 UTC",Saturn V | Apollo 6,StatusRetired,,Partial Failure


In [59]:
clean_df.describe()

Unnamed: 0,Price
count,949.0
mean,129.8
std,143.22
min,5.3
25%,40.0
50%,62.0
75%,164.0
max,450.0


# Finding Most Expensive Missions


In [58]:
clean_df.sort_values('Price', ascending=False)

Unnamed: 0,Organisation,Location,Date,Detail,Rocket_Status,Price,Mission_Status
1605,NASA,"LC-39B, Kennedy Space Center, Florida, USA","Mon Oct 18, 1993 14:53 UTC",Space Shuttle Columbia | STS-58,StatusRetired,450.00,Success
2220,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Fri Feb 03, 1984 13:00 UTC",Space Shuttle Challenger | STS-41-B,StatusRetired,450.00,Success
2094,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Wed Nov 06, 1985 17:00 UTC",Space Shuttle Challenger | STS-61-A,StatusRetired,450.00,Success
2091,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Wed Nov 27, 1985 00:29 UTC",Space Shuttle Atlantis | STS-61-B,StatusRetired,450.00,Success
2079,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Tue Jan 28, 1986 16:38 UTC",Space Shuttle Challenger | STS-51-L,StatusRetired,450.00,Failure
...,...,...,...,...,...,...,...
3560,NASA,"LC-39B, Kennedy Space Center, Florida, USA","Sun May 18, 1969 16:49 UTC",Saturn V | Apollo 10,StatusRetired,,Success
3584,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Mon Mar 03, 1969 16:00 UTC",Saturn V | Apollo 9,StatusRetired,,Success
3603,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Sat Dec 21, 1968 12:51 UTC",Saturn V | Apollo 8,StatusRetired,,Success
3683,NASA,"LC-39A, Kennedy Space Center, Florida, USA","Thu Apr 04, 1968 12:00 UTC",Saturn V | Apollo 6,StatusRetired,,Partial Failure


In [47]:
# prompt: Using dataframe df_missions_clean: bar graph for most expensive missions using plotly

import plotly.express as px
fig = px.bar(df_missions_clean.sort_values(by='Price', ascending=False).head(10), x='Price', y='Detail', orientation='h')
fig.show()
