Special thanks to: https://www.kaggle.com/code/antonkhnykin/simple-solution-with-accuracy-0-79798

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler

In [None]:
df_train = pd.read_csv("/kaggle/input/spaceship-titanic/train.csv")
df_test = pd.read_csv("/kaggle/input/spaceship-titanic/test.csv")

Dealing with nulls

In [None]:
def update_dataset(df):
    df[['Cabin_deck', 'Cabin_num', 'Cabin_side']] = df['Cabin'].str.split('/', expand=True)
    df['PassengerGroup'] = df['PassengerId'].map(lambda x: x[:4])
    df.drop(["Cabin", "Name",'PassengerId'], axis=1, inplace=True)
    return df

def update_by_cryo(df, column):
    df.loc[df['CryoSleep'] == True, column] = df.loc[df['CryoSleep'] == True, column].fillna(0)
    return df

def update_by_age(df, column):
    query_str = column + " > 0"
    min_age = df[['Age', column]].groupby('Age').sum().reset_index().query(query_str).iloc[0, 0]
    df.loc[df['Age'] < min_age, column] = df.loc[df['Age'] < min_age, column].fillna(0)
    return df

def update_by_mean(df, column):
    if column == 'Age':
        df[column].fillna(df[column].median(), inplace=True)
    elif column == 'HomePlanet':
        df[column].fillna('Earth', inplace=True)
    elif column == 'Destination':
        df[column].fillna('55 Cancri e', inplace=True)
    elif column == 'CryoSleep':
        df[column].fillna(False, inplace=True)
    elif column == 'VIP':
        df[column].fillna(False, inplace=True)
    elif column == 'Cabin_deck':
        df[column].fillna('F', inplace=True)
    elif column == 'Cabin_num':
        df[column].fillna(df[column].median(), inplace=True)
    elif column == 'Cabin_side':
        df[column].fillna('P', inplace=True)
    else:
        df[column].fillna(0, inplace=True)
    return df
    
for df in [df_train, df_test]:
    df = update_dataset(df)
    for column in ['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']:
        df = update_by_age(df, column)
    for column in ['VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']:
        df = update_by_age(df, column)
    for column in ['Age', 'RoomService', 'HomePlanet', 'Destination', 'VIP',
                   'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'CryoSleep','Cabin_deck','Cabin_num','Cabin_side']:
        df = update_by_mean(df, column)

In [None]:
y_train = df_train["Transported"]

features = ["CryoSleep", "Cabin_deck", "Cabin_side", "Age", "Destination", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck", "VIP", "Spa"]
X_train = pd.get_dummies(df_train[features])
X_test = pd.get_dummies(df_test[features])

ss = StandardScaler()
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

In [None]:
es = tf.keras.callbacks.EarlyStopping(patience=10,min_delta=0.5,verbose=1)

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.001)
model = keras.models.Sequential()
model.add(keras.layers.Input(shape=X_train.shape[1]))
model.add(keras.layers.Dense(30, activation='relu'))
model.add(keras.layers.Dense(30, activation='tanh'))
model.add(keras.layers.Dense(30, activation='relu'))
model.add(keras.layers.Dense(30, activation='tanh'))
model.add(keras.layers.Dense(30, activation='relu'))
model.add(keras.layers.Dense(20, activation='tanh'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss ="binary_crossentropy", optimizer=opt, metrics=['accuracy'])

In [None]:
history = model.fit(X_train_scaled,y_train,epochs=100,validation_split=0.1,verbose=1, callbacks=[es])

In [None]:
predictions = model.predict(X_test_scaled).round().astype(bool)

In [None]:
test = pd.read_csv("/kaggle/input/spaceship-titanic/test.csv")
output = pd.DataFrame(predictions, columns=['Transported'])
output.insert(0,'PassengerId',test['PassengerId'])
output.to_csv('submission.csv', index=False)

Now let's try lgbm

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train_scaled,y_train, test_size=0.2,random_state=42)

with those hyperparameters it managed to achieve acc 80.4%

In [None]:
import lightgbm as lgb
clf_lgb = lgb.LGBMClassifier(boosting_type='gbdt',n_estimators=75,objective='binary')
clf_lgb.fit(X_train, y_train)
predictions = clf_lgb.predict(X_test_scaled).round().astype(bool)
test = pd.read_csv('test.csv')
output = pd.DataFrame(predictions, columns=['Transported'])
output.insert(0,'PassengerId',test['PassengerId'])
output.to_csv('submission-lgbm.csv', index=False)