In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import tensorflow as tf
import collections as cl
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
print("SETUP COMPLETE!")

In [None]:
data = pd.read_csv('../input/all-space-missions-from-1957/Space_Corrected.csv')
print('DATA IMPORTING SUCCESFULLY!')

In [None]:
print('PRINTING THE DATA')
data

In [None]:
print('PREPROCESSING, REMOVING DUPLICATED OR REDUNDANT DATA!')
data.drop([data.columns[0], data.columns[1], 'Location', 'Detail'], axis=1, inplace=True)

In [None]:
print('PRINTING THE DATA AFTER PREPROCESSING')
data

In [None]:
data.columns

In [None]:
print('REMOVING THE SPACE FROM ROCKET COLUMN')
data.columns = ['Company Name', 'Datum', 'Status Rocket', 'Rocket', 'Status Mission']

In [None]:
print('CHECKING NULL DATA')
data.isnull().sum()

In [None]:
print ('CHECKING ROCKET VALUES')
data['Rocket'].unique()

In [None]:
print('CONVERTING AND FORMATTING THE DATA')
data['Rocket'].astype(str).apply(lambda data_format: data_format.replace(',', '')).astype(np.float32)

In [None]:
print('ASSIGNING CONVERTED VALUES')
data['Rocket'] = data['Rocket'].astype(str).apply(lambda data_format: data_format.replace(',', '')).astype(np.float32)

In [None]:
print('FILLING MISSING VALUES')
data['Rocket'] = data['Rocket'].fillna(data['Rocket'].mean())

In [None]:
print('CHEKING MISSING VALUES')
data.isnull().sum()

In [None]:
print('EXTRACTING THE YEAR')
def get_year(date):
    year = re.search(r'[^,]*$', date).group(0)
    year = re.search(r'^\s[^\s]*', year).group(0)
    return np.int16(year)

In [None]:
print('CHECKING THE OUTPUT FOR A NUMBER WITH SPACE')
np.int16(' 2020')

In [None]:
print('EXTRACTING THE MONTH')
def get_month(date):
    month = re.search(r'^[^0-9]*', date).group(0)
    month = re.search(r'\s.*$', month).group(0)
    return month.strip()

In [None]:
print('APPLYING get_year AND get_month FUNCTIONS')
data['Year'] = data['Datum'].apply(get_year)
data['Month'] = data['Datum'].apply(get_month)
data.drop('Datum', axis=1, inplace=True)

In [None]:
print('CHECKING RESULTS')
data

In [None]:
print('CHECKING STATUS MISSION')
data['Status Mission'].unique()

In [None]:
print('CONVERTING PRELAUNCH FAILURE & PARTIAL FAILURE TO FAILURE')
data['Status Mission'] = data['Status Mission'].apply(lambda mission_status: mission_status if mission_status == 'Success' else 'Failure')

In [None]:
print('CHECKING STATUS MISSION')
data['Status Mission'].unique()

In [None]:
print('COVERTING STATUS MISSION TO BOOL')
encoder = LabelEncoder()
data['Status Mission'] = encoder.fit_transform(data['Status Mission'])

In [None]:
print('CHECKING STATUS ROCKET')
data['Status Rocket'].unique()

In [None]:
print('ORDERING STATUS ROCKET')
rocket_ordering = ['StatusRetired', 'StatusActive']

In [None]:
print('ORDERING MONTH')
month_ordering = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']


In [None]:
print('ORDERING FUNCTION')
def ordering(data, column, order):
    return data[column].apply(lambda new_order: order.index(new_order))

In [None]:
print('APPLYING ORDERING')
data['Month'] = ordering(data, 'Month', month_ordering)
data['Status Rocket'] = ordering(data, 'Status Rocket', rocket_ordering)

In [None]:
data

In [None]:
print('TOTAL:', data['Status Mission'].count())

In [None]:
print('0 = FAILURE, 1 = SUCCESS')
data['Status Mission'].value_counts().plot(kind='barh')

In [None]:
print('CREATING A MATRIX BASED TO BE USED THROUGH COLUMNS')
def onehot(data, column):
    dummies = pd.get_dummies(data[column])
    data = pd.concat([data, dummies], axis=1)
    data.drop(column, axis=1, inplace=True)
    return data

In [None]:
print('USING THE FUNCTION TO PRINT A NEW TABLE THROUGH THE MATRIX')
data = onehot(data, 'Company Name')
data

In [None]:
print('SCALLING')
y = data['Status Mission']
x = data.drop('Status Mission', axis=1)

In [None]:
print('SET THE VALUE BETWEEN 0 ~ 1')
scaler = MinMaxScaler()
x = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)

In [None]:
print('PRINTING THE TRAINING MODEL')
x

In [None]:
print('IDENTIFYING THE METRICS')
y.sum() / len(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7)

In [None]:
print('CALCULATING DECIMALS TO APPROACH THEM TO THEIR CLOSEST INTEGER')
inputs = tf.keras.Input(shape=(60,))
x = tf.keras.layers.Dense(16, activation='relu')(inputs)
x = tf.keras.layers.Dense(16, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
print('COMPILING AND TRAINING THE MODEL')
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC(name='auc')]
)


batch_size=32
epochs=50

history = model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs
)

In [None]:
print('PLOTTING THE DATA')
plt.figure(figsize=(14, 10))

epochs_range = range(1, epochs + 1)
train_loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs_range, train_loss, label="Training Loss")
plt.plot(epochs_range, val_loss, label="Validation Loss")

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend('upper right')

plt.show()

In [None]:
print('LOST DATA')
np.argmin(val_loss)

In [None]:
print('CHECKING ACCURACY')
model.evaluate(x_test, y_test)