## Import Library

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder,MinMaxScaler,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score
import xgboost as xgb

## Import Train/Test Datasets

In [None]:
train = pd.read_csv("../input/spaceship-titanic/train.csv")
test = pd.read_csv("../input/spaceship-titanic/test.csv")

## Extract and Manipulate Columns

In [None]:
train["GroupID"] = train["PassengerId"].str.split('_').str[0]
test["GroupID"] = test["PassengerId"].str.split('_').str[0]
train = train.drop(["PassengerId","Name"],axis=1)
test_id = test["PassengerId"]
test = test.drop(["PassengerId","Name"],axis=1)
train[['Deck', 'Num', 'Side']] = train['Cabin'].str.split('/', expand=True)   
test[['Deck', 'Num', 'Side']] = test['Cabin'].str.split('/', expand=True)
train = train.drop(["Cabin"],axis = 1)
test = test.drop(["Cabin"],axis=1)
le = LabelEncoder()
train["Transported"] = le.fit_transform(train["Transported"])

## Split Data into Training and Testing Sets

In [None]:
X = train.drop(["Transported"],axis=1)
y = train["Transported"]
X_train, X_valid, y_train, y_valid = train_test_split(X, y , test_size = 0.25, random_state = 9942)

## Seperate Numerical and Categorical Features

In [None]:
numerical_features = X_train.select_dtypes(include='number').columns.tolist()
categorical_features = X_train.select_dtypes(exclude='number').columns.tolist()

## Pipeline to Handle Missing Data and Fit Model 

In [None]:
xgb_cl = xgb.XGBClassifier()
numeric_pipeline = Pipeline(steps=[
    ('impute',SimpleImputer(strategy='mean')),
    ('scale',MinMaxScaler())
])
categorical_pipeline = Pipeline(steps=[
    ('impute',SimpleImputer(strategy='most_frequent')),
    ('encode',OneHotEncoder(handle_unknown='ignore',sparse=False))
])
full_processor = ColumnTransformer(transformers=[
    ('number',numeric_pipeline,numerical_features),
    ('category',categorical_pipeline,categorical_features)
])
xgb_pipeline = Pipeline(steps=[
    ('preprocess',full_processor),
    ('model',xgb_cl)
])

## Fitting the Training Data

In [None]:
xgb_pipeline.fit(X_train,y_train)
xgb_preds = xgb_pipeline.predict(X_valid)
accuracy_score(xgb_preds,y_valid)

## Apply Model on Testing Data

In [None]:
test_preds = xgb_pipeline.predict(test)
logger = {1:"True",0:"False"}
test_preds = [logger[x] for x in test_preds]

## Prepare For Submission

In [None]:
data = {'PassengerId' : test_id,'Transported': test_preds}
data = pd.DataFrame(data)
data.to_csv('submission3.csv', index=False)