## Import Dependencies

In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR, SVC
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier

In [None]:
dataset = pd.read_csv('train.csv')
testset = pd.read_csv('test.csv')

In [None]:
X = dataset.iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]
y = dataset.iloc[:, -1]

test_set = testset.iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]]
label = testset.iloc[:, 0]

In [None]:
label

In [None]:
def parse_cabin(cabin):
    if pd.isnull(cabin):
        return np.nan, np.nan, np.nan  
    
    parts = cabin.split('/')
    if len(parts) == 3:
        deck, room, side = parts
    else:
        deck, room, side = np.nan, np.nan, np.nan  
    
    return deck, room, side

In [None]:
X['Deck'], X['Room'], X['Side'] = zip(*X['Cabin'].apply(parse_cabin))
X = X.drop('Cabin', axis=1)
feature_columns = ['HomePlanet', 'CryoSleep', 'Destination', 'Age', 'VIP', 
                   'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 
                   'Deck', 'Room', 'Side'] 

features_array = X[feature_columns].to_numpy()
features_array

In [None]:
features_array[1]

In [None]:
categorical_cols = [0, 1, 2, 4, 10, 12]  

numerical_cols = [3, 5, 6, 7, 8, 9, 11]  

numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numerical_pipeline, numerical_cols),
    ('cat', categorical_pipeline, categorical_cols)
])

X_transformed = preprocessor.fit_transform(X)

In [None]:
X_transformed[1]

In [None]:
test_set['Deck'], test_set['Room'], test_set['Side'] = zip(*test_set['Cabin'].apply(parse_cabin))
test_set = test_set.drop('Cabin', axis=1)

feature_columns = ['HomePlanet', 'CryoSleep', 'Destination', 'Age', 'VIP', 
                   'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 
                   'Deck', 'Room', 'Side'] 

test_set_transformed = preprocessor.transform(test_set[feature_columns])

test_features_array = test_set_transformed

In [None]:
le = LabelEncoder()
y = np.array(le.fit_transform(y))
y

In [None]:
clf = CatBoostClassifier(
    iterations=5, 
    learning_rate=0.1, 
    loss_function='CrossEntropy'
)
clf.fit(X_transformed, y)

In [None]:
y_pred = clf.predict(test_set_transformed)

In [None]:
threshold = 0.5
y_pred_binary = (y_pred > threshold).astype(int)

y_pred_binary

In [None]:
result_df = pd.DataFrame({
    'PassengerID': label,
    'Transported': y_pred_binary
})
result_df['Transported'] = result_df['Transported'].astype(bool)
result_df.to_csv('predictions1.csv', index=False)