In [None]:
#importing packages
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import warnings
import autosklearn.classification

## Import train/test data

In [None]:
train = pd.read_csv("data/train.csv") ## download train/test from https://www.kaggle.com/c/titanic
test = pd.read_csv("data/test.csv")
print(train.shape)
print(test.shape)

## Data transformation functions

In [None]:
#label encoding for sklearn algorithms
def encode_features(df_train, df_test): 
    features = ['Fare', 'Cabin', 'Age', 'Sex', 'Lname', 'NamePrefix']
    df_combined = pd.concat([df_train[features], df_test[features]])
    for feature in features:
        le = preprocessing.LabelEncoder()
        le = le.fit(df_combined[feature])
        df_train[feature] = le.transform(df_train[feature])
        df_test[feature] = le.transform(df_test[feature])
    return df_train, df_test

# Binning ages
def simplify_ages(df): 
    df.Age = df.Age.fillna(-0.5)
    bins = (-1, 0, 5, 12, 18, 25, 35, 60, 120)
    group_names = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Senior']
    categories = pd.cut(df.Age, bins, labels=group_names)
    df.Age = categories
    return df

# Storing first letter of cabins
def simplify_cabins(df): 
    df.Cabin = df.Cabin.fillna('N')
    df.Cabin = df.Cabin.apply(lambda x: x[0])
    return df

# Binning fares
def simplify_fares(df): 
    df.Fare = df.Fare.fillna(-0.5)
    bins = (-1, 0, 8, 15, 31, 1000)
    group_names = ['Unknown', '1_quartile', '2_quartile', '3_quartile', '4_quartile']
    categories = pd.cut(df.Fare, bins, labels=group_names)
    df.Fare = categories
    return df

# Keeping title
def format_name(df): 
    df['Lname'] = df.Name.apply(lambda x: x.split(' ')[0])
    df['NamePrefix'] = df.Name.apply(lambda x: x.split(' ')[1])
    return df

# Dropping id's and other unusable features
def drop_features(df): 
    return df.drop(['Ticket', 'Name', 'Embarked'], axis=1)

# Call to data transform functions
def transform_features(df):
    df = simplify_ages(df)
    df = simplify_cabins(df)
    df = simplify_fares(df)
    df = format_name(df)
    df = drop_features(df)
    return df

## Run data transformations on train/test

In [None]:
train = transform_features(train)
test = transform_features(test)
train, test = encode_features(train, test)
train.head()

## Train-test split

In [None]:
X_all = train.drop(['Survived', 'PassengerId'], axis=1)
y_all = train['Survived']
num_test = 0.20
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=num_test, random_state=42)

## Create classifier using autosklearn

In [None]:
cls = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=3600,
        per_run_time_limit=360,
        tmp_folder='tmp/autosklearn_cv_example_tmp',
        output_folder='tmp/autosklearn_cv_example_out',
        delete_tmp_folder_after_terminate=True,
        resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 10})

## fit models

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    cls.fit(X_train.copy(), y_train.copy())

## final ensemble on whole dataset

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    cls.refit(X_train.copy(), y_train.copy())

## show models

In [None]:
print(cls.show_models())

## predict on test

In [None]:
predictions_test = cls.predict(X_test)
print(accuracy_score(y_test, predictions_test))