In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.graph_objects as go
import plotly.express as px

import pycountry

![1](https://media.giphy.com/media/mi6DsSSNKDbUY/giphy.gif)

## <p style="color:Maroon; font-weight: bold; font-variant: small-caps;">Content</p>


* [Importing Data](#1)
* [First-Looking to Data](#2)
* [Pre-Processing](#3)
    * [Droping Columns](#4)
    * [Changing Colomn Name](#5)
    * [Fixing Type of Some Columns](#6)
    * [Seperating Country Names From Location](#7)
* [EDA with Plotly](#8)
    * [Countries with having the most space corp](#9)
    * [Countries on Map](#10)
    * [Creating a new DF which just includes USA-Russia and China](#11)
    * [Countries and Corps with having the most space missions](#12)
    * [Success Rates of the Missions](#13)
    * [How Many Space Missions Are Active?](#14)
    * [The spending budget by Countries and Space Corps](#15)
    * [The spending avg budget for Each Mission by Countries and Space Corps](#16)
    * [Distribition of Location by Mission Numbers](#17)
    * [Does the Month Matter To Start Mission?](#18)
    * [The Mission Numbers by Year](#19)



<a id="1"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Importing Data</p>

In [None]:
space_missions = pd.read_csv("/kaggle/input/all-space-missions-from-1957/Space_Corrected.csv")
df = space_missions.copy()

<a id="2"></a>
## <p style="color:Maroon; font-weight: bold; font-variant: small-caps;">First-Looking to Data</p>

In [None]:
df.head()

In [None]:
df.info()

**There are four problems which we should fix:<br><br>**
1) We will drop columns which names are "Unnamed: 0" and "Unnamed: 0.1"<br>
2) We should change name of Rocket column. There is a space charecter<br>
3) We have to change type of columns which names are Datum and  Rocket<br>
4) We should be carefull while we using column Rocket because of having a lot of null values. Otherwise, we can get unexpected value.

In [None]:
values = [['Company Name', 'Location', 'Datum', 'Detail', 'Status Rocket', 'Rocket', 'Status Mission'], #1st col
  ["Name of the Company", "Place where the Mission Started","Date of the Mission Started",
  "Information About Rocket", "Information About Status of Rocket. It is Active or Retired",
  "Cost of the Mission", "Information About Status of Mission. It wasa Success or Not"]]


fig = go.Figure(data=[go.Table(
  columnwidth = [80,400],
  header = dict(
    values = [['<b>COLUMN NAME</b>'],
                  ['<b>DESCRIPTION</b>']],
    line_color='darkslategray',
    fill_color='royalblue',
    align=['left','center'],
    font=dict(color='white', size=12),
    height=40
  ),
  cells=dict(
    values=values,
    line_color='darkslategray',
    fill=dict(color=['paleturquoise', 'white']),
    align=['left', 'center'],
    font_size=12,
    height=30)
    )
])
fig.show()

<a id="3"></a>
## <p style="color:Maroon; font-weight: bold; font-variant: small-caps;">Pre-Processing</p>

<a id="4"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Droping Columns</p>

In [None]:
df = df.drop(["Unnamed: 0", "Unnamed: 0.1"], axis = 1)

<a id="5"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Changing Colomn Name</p>

In [None]:
df.rename(columns={" Rocket": "Rocket"}, inplace = True)

<a id="6"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Fixing Type of Some Columns</p>

In [None]:
df['Rocket'] = df['Rocket'].str.replace(',', '')
df['Rocket'] = df['Rocket'].astype(np.float32)
df['Rocket'] = df['Rocket'] * 1000000

df['Year'] = pd.to_datetime(df['Datum']).apply(lambda x: x.year)
df['Month'] = pd.to_datetime(df['Datum']).apply(lambda x: x.month)

<a id="7"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Seperating Country Names From Location</p>

In [None]:
df["country"] = df["Location"].apply(lambda x: x.strip().split(", ")[-1])

<a id="8"></a>
## <p style="color:Maroon; font-weight: bold; font-variant: small-caps;">EDA with Plotly</p>

<a id="9"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Countries with having the most space corp</p>

In [None]:
countries_list = list()
frequency_list = list()
test = df.groupby("country")["Company Name"].unique()
for i in test.iteritems():
    countries_list.append(i[0])
    frequency_list.append(len(i[1]))
    
companies = pd.DataFrame(list(zip(countries_list, frequency_list)), columns =['Country', 'Company Number'])
companies = companies.sort_values("Company Number", ascending=False)

In [None]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(companies.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[companies["Country"].head(7), companies["Company Number"].head(7)],
               fill_color='lavender',
               align='left'))
])

fig.update_layout(title="Countries List which Have More Than One Corp.")
fig.show()

As we see in table, Top 3 are USA, Russia and China.
* Do not confused with Kazakhstan, it was a part of Soviet Union so the number is high than we expect. Most of them are not valid, nowadays.


<a id="10"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Countries on Map</p>

In [None]:
companies["IsoAlpha3"] = companies.Country[:7].apply(lambda x: pycountry.countries.search_fuzzy(x)[0].alpha_3)

fig = px.scatter_geo(companies, locations="IsoAlpha3", size="Company Number")
fig.show()

<a id="11"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Creating a new DF which just includes USA-Russia and China</p>

In [None]:
df = df[(df["country"] == "USA") | (df["country"] == "Russia") | (df["country"] == "China")]
df.head()

<a id="12"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Countries and Corps with having the most space missions</p>

In [None]:
test = pd.DataFrame(df.groupby(["country","Company Name"])["Location"].count())
test.rename(columns={"Location":"Mission Number"}, inplace=True)

In [None]:
test = test.reset_index(level=[0,1])
fig = px.bar(test, x="Mission Number", y="country",
             color='Company Name', text="Company Name")
fig.update_layout(
    title='Mission Numbers by Countries and Corp Names',
    yaxis=dict(
        title='Countries',
        titlefont_size=16,
        tickfont_size=14,
    ),
)
fig.show()

<a id="13"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Success Rates of the Missions</p>

In [None]:
a = pd.DataFrame(df.groupby(["country","Company Name","Status Mission"]).Location.count())
a = a.reset_index(level=[0,1,2])

fig = px.sunburst(a, path=["country", 'Company Name', 'Status Mission'], values='Location',color="Company Name")
fig.show()

**You can click on Country or Corp Names to see more clear.**

<a id="14"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">How Many Space Missions Are Active?</p>

In [None]:
b = pd.DataFrame(df.groupby(["country", "Company Name", "Status Rocket"])["Location"].count())
b = b.reset_index(level=[0,1,2])
b.rename(columns={"Location":"Numbers"}, inplace=True)
b = b[b["Status Rocket"] == "StatusActive"]
fig = px.bar(b, x="country", y="Numbers", color = "Company Name", title="Active Space Missions Number")
fig.show()

<a id="15"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">The spending budget by Countries and Space Corps</p>

In [None]:
df_2 = df.dropna() # I've said in first-looking to data to remove null values. While we using Rocket column.

In [None]:
test = pd.DataFrame(df_2.groupby(["country", "Company Name"])["Rocket"].sum())
test = test.reset_index(level=[0,1])

fig = px.bar(test, x='country', y='Rocket', color ='Company Name')
fig.show()

<a id="16"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">The spending avg budget for Each Mission by Countries and Space Corps</p>

In [None]:
test2 = pd.DataFrame(df_2.groupby(["country", "Company Name"])["Location"].count())
test2 = test2.reset_index(level=[0,1])

test["Mission Number"] = test2["Location"]
test["Amount for Each Mission"] = test["Rocket"] / test["Mission Number"]

fig = px.bar(test, x='country', y='Amount for Each Mission', color ='Company Name')
fig.show()

<a id="7"></a>

### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Distribition of Location by Mission Numbers</p>

In [None]:
test = pd.DataFrame(df.groupby(["country","Location"])["Location"].count())
test.rename(columns={"Location": "Mission Number"}, inplace = True)
test = test.reset_index(level=[0,1])
test = test.sort_values("Mission Number", ascending = False)
fig = px.bar(test, x='Mission Number', y='Location', color ='country')
fig.show()

<a id="18"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">Does the Month Matter To Start Mission?</p>

In [None]:
test = pd.DataFrame(df.groupby(["country", "Month"])["Location"].count())
test = test.reset_index(level=[0,1])

fig = px.bar(test, x='Month', y='Location', color ='country')
fig.show()

**Seems not.**

<a id="19"></a>
### <p style="color:DarkOliveGreen; font-weight: bold; font-variant: small-caps;">The Mission Numbers by Year</p>

In [None]:
test = pd.DataFrame(df.groupby(["country", "Year"])["Location"].count())
test = test.reset_index(level=[0,1])

fig = px.bar(test, x='Year', y='Location', color ='country')
fig.show()

### <p style="color:Black; font-weight: bold; font-variant: small-caps;">Thank you for taking the time and review.</p>