In [None]:
import numpy as np 
import pandas as pd 
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from pickle import dump, load

In [None]:
!nvidia-smi

In [None]:
data = pd.read_csv("../input/spaceship-titanic/train.csv")

min_max_scaler = MinMaxScaler()
min_max_scaler.fit(data[["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]])
data[["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]] = min_max_scaler.transform(data[["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]])   

dump(min_max_scaler, open('min_max_scaler.pkl', 'wb'))

In [None]:
data.head()

In [None]:
data["cabin_1"] = " "
data["cabin_2"] = " "
data["cabin_3"] = " "

for n in range(len(data["Cabin"].values)):
    if type(data["Cabin"][n]) == float:
        data["cabin_1"][n] = "n"
        data["cabin_2"][n] = "n"
        data["cabin_3"][n] = "n"
    else:
        chars = [char for char in data["Cabin"][n]]
        data["cabin_1"][n] = chars[0]
        data["cabin_2"][n] = chars[2]
        data["cabin_3"][n] = chars[4]

data = data.drop(columns=["Cabin", "Name"])

features_encoder = OrdinalEncoder()
features_encoder.fit(data[["HomePlanet", "CryoSleep", "Destination", "VIP", "cabin_1", "cabin_2", "cabin_3"]])
data[["HomePlanet", "CryoSleep", "Destination", "VIP", "cabin_1", "cabin_2", "cabin_3"]] = features_encoder.transform(data[["HomePlanet", "CryoSleep", "Destination", "VIP", "cabin_1", "cabin_2", "cabin_3"]])

labels_encoder = OrdinalEncoder()
labels_encoder.fit(data[["Transported"]])
data[["Transported"]] = labels_encoder.transform(data[["Transported"]])

dump(features_encoder, open('features_encoder.pkl', 'wb'))

data = data.fillna(-1)

In [None]:
Y = data.pop("Transported")
X = data.drop(columns="PassengerId")

In [None]:
model = Sequential()
model.add(Dense(13, input_dim=13))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])



# check if the model doesn't overfit first then for the final submission don't use validations
#model.fit(X, Y, epochs=50, batch_size=32, validation_split = 0.1)
model.fit(X, Y, epochs=50, batch_size=2)

_, accuracy = model.evaluate(X, Y)
print(f"accuracy is {accuracy}")

In [None]:
prediction_data = pd.read_csv("../input/spaceship-titanic/test.csv")

prediction_data[["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]] = min_max_scaler.transform(prediction_data[["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]])   

prediction_data["cabin_1"] = " "
prediction_data["cabin_2"] = " "
prediction_data["cabin_3"] = " "

for n in range(len(prediction_data["Cabin"].values)):
    if type(prediction_data["Cabin"][n]) == float:
        prediction_data["cabin_1"][n] = "n"
        prediction_data["cabin_2"][n] = "n"
        prediction_data["cabin_3"][n] = "n"
    else:
        chars = [char for char in prediction_data["Cabin"][n]]
        prediction_data["cabin_1"][n] = chars[0]
        prediction_data["cabin_2"][n] = chars[2]
        prediction_data["cabin_3"][n] = chars[4]

prediction_data = prediction_data.drop(columns=["Cabin", "Name"])

prediction_data[["HomePlanet", "CryoSleep", "Destination", "VIP", "cabin_1", "cabin_2", "cabin_3"]] = features_encoder.transform(prediction_data[["HomePlanet", "CryoSleep", "Destination", "VIP", "cabin_1", "cabin_2", "cabin_3"]])

prediction_data = prediction_data.fillna(-1)

labels = prediction_data.pop("PassengerId")

predictions = model.predict(prediction_data)

my_submission = pd.DataFrame({
    "PassengerId": ["placeholder" for n in range(len(predictions))],
    "Transported": ["placeholder" for n in range(len(predictions))]
})

for n in range(len(predictions)):
    my_submission["PassengerId"][n] = labels[n]
    if predictions[n] > 0.5:
        my_submission["Transported"][n] = True
    else:
        my_submission["Transported"][n] = False

my_submission.to_csv('submission.csv', index=False)