# Before

In [None]:
from IPython.display import clear_output
!pip install -U lightautoml
clear_output()

import numpy as np
import pandas as pd
import os,time,random

from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import train_test_split
import torch

from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.tasks import Task
from lightautoml.report.report_deco import ReportDeco

TRAIN_PATH = "../input/spaceship-titanic/train.csv"
TEST_PATH = "../input/spaceship-titanic/test.csv"
SAMPLE_SUBMISSION_PATH = "../input/spaceship-titanic/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

ID = "PassengerId"
TARGET = "Transported"

SEED = 2022
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything()

MODEL_TASK_TYPE = "binary"
MODEL_METRIC = "accuracy"
MODEL_LOSS = "logloss"
MODEL_N_THREADS = 4 
MODEL_TIMEOUT = 60*5
MODEL_USE_ALGOL = [['linear_l2','lgb','lgb_tuned']]

torch.set_num_threads(MODEL_N_THREADS)

# Build

In [None]:
train = pd.read_csv(TRAIN_PATH)

task = Task(MODEL_TASK_TYPE, metric=MODEL_METRIC, loss=MODEL_LOSS)
roles = {'target': TARGET,'drop': [ID]}
automl = TabularAutoML(
    task = task,
    timeout = MODEL_TIMEOUT,
    cpu_limit = MODEL_N_THREADS,
    reader_params = {'n_jobs': MODEL_N_THREADS, 'random_state': SEED},
    general_params = {'use_algos': MODEL_USE_ALGOL}
)

oof_pred = automl.fit_predict(train, roles = roles, verbose=3)

fast_fi = automl.get_feature_scores('fast')
fast_fi.set_index('Feature')['Importance'].plot.bar(figsize=(20, 10), grid=True)


# After 

In [None]:
test = pd.read_csv(TEST_PATH)
test_pred = automl.predict(test)

sub = pd.read_csv(SAMPLE_SUBMISSION_PATH)
sub[TARGET] = (test_pred.data[:, 0] > 0.5).astype(bool)
sub.to_csv(SUBMISSION_PATH, index=False)
sub.head()