<h1 style='background:white; color:green; padding:15px 0 0 0 '> <center> SPACESHIP TITANIC PREDICTION </center> </h1>

> **Importing Required Libraries**

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import iplot
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
_template = dict(layout=go.Layout(font=dict(family='Frankling Gothic', size=12), width=1000))
print(tf.__version__)

In [None]:
raw_data = pd.read_csv('../input/spaceship-titanic/train.csv')
raw_data.head().style.background_gradient().set_table_styles(
    [{'selector': 'tr:hover',
      'props': [('background-color', 'green')]}]
)  

<p style='font-size:16px'> The Dataframe shows all of the columns and we can guess have guessesa about our Target column and the Column which we don't need to use.
So, Doing the same thing dropping Column 'PassengerId', and 'Name' as they are not goinng to give any contribution in our prediction.</p>

In [None]:
train_dataset = raw_data.drop(['PassengerId', 'Name'], axis=1)
y_train = train_dataset['Transported']

<h1 style='color:#00FF79; font-size:30px;'> <center> ---  Descriptive statistics  --- </center> </h1>

In [None]:
train_dataset.describe().transpose().round(2).style.set_table_styles(
    [{'selector': 'tr:hover',
      'props': [('background-color', 'green')]}]
).set_properties(**{
    'font-size': '17pt',})

In [None]:
train_dataset.isnull().sum()

# Visualization of Features Distribution

<h3 style='color:red;'> The Visualization of each Feature with respect to target features is being shown with the help of plotly. </h3>

In [None]:
fig = px.box(train_dataset, y='Age', color='Transported', points='all')
fig.update_layout(template=_template, title='Age Distribution')
fig.show()

In [None]:
fig = px.box(train_dataset, y='RoomService', color='Transported', points='all', notched=True, 
             color_discrete_map={False:'rgb(255, 0, 153)', True:'rgb(52, 0, 255)'})
fig.update_traces(quartilemethod='exclusive')
fig.update_layout(template=_template, title='RoomService Distribution')
fig.show()

In [None]:
fig = px.box(train_dataset, y='FoodCourt', color='Transported', points='all', notched=True, 
             color_discrete_map={False:'rgb(255, 0, 153)', True:'rgb(227, 255, 0)'})
fig.update_traces(quartilemethod='exclusive')
fig.update_layout(template=_template, title='Foodcourt Distribution')
fig.show()

In [None]:
fig = px.box(train_dataset, y='ShoppingMall', color='Transported', points='all', notched=True, 
             color_discrete_map={False:'rgb(255, 133, 3)', True:'rgb(52, 0, 255)'})
fig.update_traces(quartilemethod='exclusive')
fig.update_layout(template=_template, title='RoomService Distribution')
fig.show()

In [None]:
fig = px.bar(train_dataset, y='VIP', color='Transported')
fig.update_traces(dict(marker_line_width=0))
fig.update_layout(template=_template, title='VIP Distribution')
fig.show()

In [None]:
fig = px.bar(train_dataset, y='CryoSleep', color='Transported')
fig.update_traces(dict(marker_line_width=0))
fig.update_layout(template=_template, title='CryoSleep Distribution')
fig.show()

In [None]:
fig = px.bar(train_dataset, y='HomePlanet', color='Transported')
fig.update_traces(dict(marker_line_width=0))
fig.update_layout(template=_template, title='HomePlanet Distribution')
fig.show()

> <h3 style='color:#FF0086'> In order to Predict the target variable Cabin Features is beling splitted into two categories </h3>

In [None]:
def D_cabin(Cabin):
    try:
        return Cabin.split('/')[0]
    except:
        return np.NaN 
def S_cabin(Cabin):
    try :
        return Cabin.split('/')[2]
    except :
        return np.NaN
 

> **Preprocessing with the featues**

In [None]:
train_dataset['VIP'] = train_dataset['VIP'].astype('float64')
train_dataset['CryoSleep'] = train_dataset['CryoSleep'].astype('float64')
train_dataset['Dec_cabin'] = train_dataset["Cabin"].apply(lambda x: D_cabin(x))
train_dataset['Sid_cabin'] = train_dataset["Cabin"].apply(lambda x: S_cabin(x))
train_dataset.drop(['Cabin'], axis=1, inplace=True)

In [None]:
columns = train_dataset.columns.to_list()
columns.remove('Transported')
columns.append('Transported')
train_dataset = train_dataset[columns]
train_dataset.head().style.set_table_styles(
    [{'selector': 'tr:hover',
      'props': [('background-color', 'green')]}]
).set_properties(**{
    'font-size': '14pt',})

In [None]:
x_train = train_dataset.iloc[:, :-1]
y_train = train_dataset.iloc[:, -1]
categorical_columns = list(train_dataset.select_dtypes(include='object').columns)
numerical_columns = list(train_dataset.select_dtypes(include='float64').columns)

<div style='background:white; color:black; font-size:18px'>
    <p > The Data Preprocesing is required in order to pass the data to our Nerual Network </p>
    <p> Here, Pipeline is used for Imputation and Scaling of Numerical data. Similarly, encoded along with imputation.</p>
</div>

In [None]:
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('Scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps = [
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)),
    ('scaler', StandardScaler())
])

preprocessing = ColumnTransformer(transformers=[
    ('cat', categorical_transformer, categorical_columns),
    ('num', numerical_transformer, numerical_columns)
])

In [None]:
x = preprocessing.fit_transform(x_train)

# Tnsorflow Model
* The Model is created with 3 Dense Layers along with Regularization Parameter for avoiding overfitting.
* The DNN model is compiled with adam optimizer and BinaryCrossentropy as Loss Function

In [None]:
the_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=16, activation='relu', input_shape=[12,]),
    tf.keras.layers.Dense(units=8, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(units=1, activation='sigmoid')
])

the_model.compile(optimizer='adam',
                 loss=tf.keras.losses.BinaryCrossentropy(),
                 metrics=['accuracy'])

the_model.summary()

In [None]:
history = the_model.fit(x, y_train,
                        epochs=60,     
                        validation_split=0.2)

# The Model Performance on Training and Validation Set

In [None]:
hist = history.history
hist = pd.DataFrame(hist)
fig = px.line(hist, color_discrete_map={'val_accuracy':'rgb(0, 197, 255)', 'accuracy':'rgb(255, 23, 4)'})
fig.update_layout(template=_template, title='Metrics')
fig.show()

# TEST Submission

In [None]:
test_data = pd.read_csv('../input/spaceship-titanic/test.csv')
id_col = test_data.iloc[:, 0]
test_data['Dec_cabin'] = test_data["Cabin"].apply(lambda x: D_cabin(x))
test_data['Sid_cabin'] = test_data["Cabin"].apply(lambda x: S_cabin(x))
test_data.drop(['Cabin', 'Name', 'PassengerId'], axis=1, inplace=True)
test_data.head()

In [None]:
x_test = preprocessing.fit_transform(test_data)

In [None]:
prediction = the_model.predict(x_test)
prediction = prediction.reshape(-1)
df = {'PassengerId':id_col, 'Transported':prediction}
df = pd.DataFrame(df)
df['Transported'] = df['Transported'].map(lambda x: True if x>=0.5 else False)
df.to_csv('submission.csv', index=False)

<h1 style='color:#00E9FF;'> <center> Thanks for looking ! </center></h1>
<h1 style='color:#00E9FF;'> <center> Good to hear from You! </center></h1>