# MLJAR - Machine Learning with Joy And Relax :)

MLJAR GitHub: https://github.com/mljar/mljar-supervised

<img src="https://raw.githubusercontent.com/mljar/visual-identity/main/media/kaggle_banner_white.png" style="width: 80%;"/>

Additional features are from:

https://www.kaggle.com/alexryzhkov/tps-april-21-lightautoml-starter

In [None]:
!pip install -q -U mljar-supervised

In [None]:
import numpy as np
import pandas as pd
from supervised.automl import AutoML # mljar-supervised

In [None]:
train = pd.read_csv("../input/tabular-playground-series-apr-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-apr-2021/test.csv")

In [None]:
# code from -> https://www.kaggle.com/alexryzhkov/tps-april-21-lightautoml-starter
def converter(x):
    spl = str(x).strip().split()[-1]
    try:
        return float(x)
    except:
        return np.nan   
    
def create_extra_features(data):
    data['Ticket_type'] = data['Ticket'].map(lambda x: str(x)[:3].replace('.', '').replace('/', '').replace(' ', '').strip() if len(str(x).split()) > 1 else np.nan)
    data['Ticket_len'] = data['Ticket_type'].str.len()
    data['Ticket_number'] = data['Ticket'].map(converter)
    
    data['Cabin_type'] = data['Cabin'].map(lambda x: str(x)[0] if str(x) != 'nan' else np.nan)
    data['Cabin_number'] = data['Cabin'].map(lambda x: float(str(x)[1:]) if str(x) != 'nan' else np.nan)
    
    data['Fixed_Name'] = data['Name'].map(lambda x: x.split(', ')[1])
    data['FamilySize'] = data['SibSp'] + data['Parch'] + 1
    
    data['CategoricalTicket'] = pd.qcut(data['Ticket_number'], 20).astype(str)
    data['CategoricalCabin'] = pd.qcut(data['Cabin_number'], 20).astype(str)
    data['CategoricalFare'] = pd.qcut(data['Fare'], 20).astype(str)
    data['CategoricalAge'] = pd.qcut(data['Age'], 20).astype(str)
    return data

train = create_extra_features(train)
test = create_extra_features(test)

In [None]:
train.head()

In [None]:
x_cols = train.columns[2:].tolist()
x_cols.remove('Name')
x_cols.remove('Ticket')
y_col = "Survived"

In [None]:
automl = AutoML(
    mode="Optuna", 
    optuna_time_budget=1200,
    eval_metric="f1",
    total_time_limit=48*3600,
    validation_strategy={
        "validation_type": "kfold",
        "k_folds": 5
    },
    random_state=2021
)
automl.fit(train[x_cols], train[y_col])

In [None]:
preds = automl.predict(test[x_cols])
submission = pd.DataFrame({'PassengerId':test.iloc[:,0], 'Survived': preds})
submission.to_csv('1_submission.csv', index=False)

In [None]:
automl.report()

# Thank you!

<img src="https://raw.githubusercontent.com/mljar/visual-identity/main/media/robot_academy.png" style="width: 40%;"/>
