![](https://miro.medium.com/max/1000/1*_wOrre885WuxLetqRXduBw.jpeg)

# Auto Sklearn
Auto-Sklearn is an automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator.

Auto-Sklearn frees a machine learning user from algorithm selection and hyperparameter tuning. It leverages recent advantages in Bayesian optimization, meta-learning and ensemble construction.

# Installing Auto Sklearn

In [None]:
!apt install -y build-essential swig curl
!curl https://raw.githubusercontent.com/automl/auto-sklearn/master/requirements.txt | xargs -n 1 -L 1 pip install
!pip install auto-sklearn

In [None]:
import autosklearn
print(autosklearn.__version__)

In [None]:
import pandas as pd

train = pd.read_csv('../input/tabular-playground-series-apr-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-apr-2021/test.csv')

# Data Preprocessing

In [None]:
# Fill NaNs
test['Age'].fillna((train['Age'].mean()), inplace=True)
train['Age'].fillna((train['Age'].mean()), inplace=True)
test['Fare'].fillna((train['Fare'].mean()), inplace=True)
train['Fare'].fillna((train['Fare'].mean()), inplace=True)

In [None]:
# Dropping some columns:
train.drop(['PassengerId','Name','Ticket','Cabin'], axis=1, inplace=True)
test.drop(['PassengerId', 'Name','Ticket','Cabin'], axis=1, inplace=True)

In [None]:
# Imputing
train_cols = train.columns.to_list()
test_cols = test.columns.to_list()

from sklearn.impute import SimpleImputer
import numpy as np

imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')

train = imputer.fit_transform(train)
test = imputer.fit_transform(test)

train = pd.DataFrame(train, columns = train_cols)
test = pd.DataFrame(test, columns = test_cols)

In [None]:
target = train.filter(['Survived'])
target['Survived'] = pd.to_numeric(target['Survived'])

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

def preprocess(df, encoder=None,
               scaler=None, cols_to_drop=None,
               cols_to_encode=None, cols_to_scale=None):
    """
    Preprocess input data
    :param df: DataFrame with data
    :param encoder: encoder object with fit_transform method
    :param scaler: scaler object with fit_transform method
    :param cols_to_drop: columns to be removed
    :param cols_to_encode: columns to be encoded
    :param cols_to_scale: columns to be scaled
    :return: DataFrame
    """

    if encoder:
        for col in cols_to_encode:
            df[col] = encoder.fit_transform(df[col])

    if scaler:
        for col in cols_to_scale:
            df[col] = scaler.fit_transform(df[col].values.reshape(-1, 1))

    if cols_to_drop:
        df = df.drop(cols_to_drop, axis=1)

    return df

In [None]:
cat_cols = ['Sex', 'Embarked']
cont_cols = ['Age', 'SibSp', 'Parch', 'Fare']

train = preprocess(train, encoder=LabelEncoder(), scaler=StandardScaler(),
                  cols_to_drop=['Survived'], cols_to_encode=cat_cols,
                  cols_to_scale=cont_cols)

test = preprocess(test, encoder=LabelEncoder(), scaler=StandardScaler(),
                 cols_to_encode=cat_cols,
                 cols_to_scale=cont_cols)

In [None]:
train['Pclass'] = pd.to_numeric(train['Pclass'])
test['Pclass'] = pd.to_numeric(test['Pclass'].astype(str).astype(int))

In [None]:
import copy

X_train = train.copy()
y_train = target.copy()
X_test = test.copy()

# Model Definition

In [None]:
import autosklearn.classification
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=10*60,
    per_run_time_limit=60,
    n_jobs=-1
)
cls.fit(X_train, y_train)

In [None]:
# 4:42
# Print the final ensemble constructed by auto-sklearn
print(cls.show_models())

In [None]:
predictions = cls.predict(X_test)

In [None]:
submissions = pd.read_csv("../input/tabular-playground-series-apr-2021/sample_submission.csv")
submissions['Survived'] = predictions
submissions.to_csv('submission_4.csv', index=False)