# Before Building Model

In [None]:
from IPython.display import clear_output
!pip install evalml --user
clear_output()

import numpy as np
import pandas as pd
import os,random,warnings
warnings.simplefilter('ignore')

import evalml
from evalml.automl import AutoMLSearch

TRAIN_PATH = "../input/spaceship-titanic/train.csv"
TEST_PATH = "../input/spaceship-titanic/test.csv"
SAMPLE_SUBMISSION_PATH = "../input/spaceship-titanic/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

ID = "PassengerId"
TARGET = "Transported"

SEED = 2022
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything()

# Build Model

In [None]:
from evalml.objectives import get_core_objectives
from evalml.problem_types import ProblemTypes

for objective in get_core_objectives(ProblemTypes.BINARY):
    print(objective.name)

In [None]:
evalml.objectives.get_all_objective_names()

In [None]:
MODEL_PROBLEM_TYPE = ProblemTypes.BINARY
MODEL_MAX_BATCHES = 10
MODEL_OBJECTIVE='Accuracy Binary'
MODEL_OBJECTIVE_ADDITIONAL=['Balanced Accuracy Binary']
MODEL_SAVE_PATH = "spaceship_titanic_evalml_model.pkl"

In [None]:
train = pd.read_csv(TRAIN_PATH)

X = train.drop([ID,TARGET],axis=1)
y= train[TARGET]

X_train, X_val, y_train, y_val = evalml.preprocessing.split_data(X, y, problem_type=MODEL_PROBLEM_TYPE)

automl = AutoMLSearch(
    X_train=X_train, 
    y_train=y_train, 
    problem_type=MODEL_PROBLEM_TYPE,
    objective=MODEL_OBJECTIVE,
    additional_objectives=MODEL_OBJECTIVE_ADDITIONAL,
    max_batches=MODEL_MAX_BATCHES,
    optimize_thresholds=True
)
automl.search()

display(automl.rankings)
automl.best_pipeline.save(MODEL_SAVE_PATH)
# check_model=automl.load('model.pkl')
automl.best_pipeline

In [None]:
automl.describe_pipeline(automl.rankings.iloc[0]["id"])

In [None]:
automl.best_pipeline.score(X_val, y_val, objectives=["Accuracy Binary",'Balanced Accuracy Binary'])

# After Building Model

In [None]:
test = pd.read_csv(TEST_PATH)
X_test = test.drop([ID],axis=1)

sub = pd.read_csv(SAMPLE_SUBMISSION_PATH)
sub[TARGET] = automl.best_pipeline.predict(X_test)
sub.to_csv(SUBMISSION_PATH,index=False)
sub.head(10)