**Created by Sanskar Hasija**

**🤖LightAutoML Classification - Titanic**

**15 NOVEMBER 2021**


# <center> 🤖LIGHTAUTOML CLASSIFICATION - TITANIC</center>
## <center>If you find this notebook useful, support with an upvote👍</center>

# Installing LightAutoML

In [1]:
from IPython.display import clear_output

!pip install -U lightautoml
clear_output()

# Imports

In [2]:
import numpy as np
import pandas as pd
from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
from lightautoml.tasks import Task
from sklearn.metrics import f1_score

# Data Loading and Preprocessing

In [3]:
train = pd.read_csv("../input/titanic/train.csv")
test = pd.read_csv('../input/titanic/test.csv')
subs = pd.read_csv('../input/titanic/gender_submission.csv')

drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'SibSp','Parch']
train = train.drop(drop_elements, axis = 1)
test = test.drop(drop_elements, axis = 1)

def checkNull_fillData(df):
    for col in df.columns:
        if len(df.loc[df[col].isnull() == True]) != 0:
            if df[col].dtype == "float64" or df[col].dtype == "int64":
                df.loc[df[col].isnull() == True,col] = df[col].mean()
            else:
                df.loc[df[col].isnull() == True,col] = df[col].mode()[0]
                
checkNull_fillData(train)
checkNull_fillData(test)

str_list = [] 
num_list = []
for colname, colvalue in train.iteritems():
    if type(colvalue[1]) == str:
        str_list.append(colname)
    else:
        num_list.append(colname)
        
train = pd.get_dummies(train, columns=str_list)
test = pd.get_dummies(test, columns=str_list)

# AutoML

In [4]:
N_THREADS = 4 
N_FOLDS = 5 
RANDOM_STATE = 12
TEST_SIZE = 0.2 
TIMEOUT = 1800  #30 mins

def f1_metric(y_true, y_pred, **kwargs):
    return f1_score(y_true, (y_pred > 0.5).astype(int), **kwargs)

task = Task('binary', metric = f1_metric)
roles = {
    'target': 'Survived',
}

In [5]:
automl = TabularUtilizedAutoML(task = task, 
                       timeout = TIMEOUT,
                       cpu_limit = N_THREADS,
                       random_state=RANDOM_STATE,
                       general_params = {'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params = {'n_jobs': N_THREADS})
history = automl.fit_predict(train, roles = roles , verbose =1 )

[12:03:02] Start automl [1mutilizator[0m with listed constraints:
[12:03:02] - time: 1800.00 seconds
[12:03:02] - CPU: 4 cores
[12:03:02] - memory: 16 GB

[12:03:02] [1mIf one preset completes earlier, next preset configuration will be started[0m

[12:03:02] Start 0 automl preset configuration:
[12:03:02] [1mconf_0_sel_type_0.yml[0m, random state: {'reader_params': {'random_state': 12}, 'general_params': {'return_all_predictions': False}}
[12:03:02] Stdout logging level is INFO.
[12:03:02] Task: binary

[12:03:02] Start automl preset with listed constraints:
[12:03:02] - time: 1800.00 seconds
[12:03:02] - CPU: 4 cores
[12:03:02] - memory: 16 GB

[12:03:02] [1mTrain data shape: (891, 9)[0m

[12:03:05] Layer [1m1[0m train process start. Time left 1796.70 secs
[12:03:05] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[12:03:06] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.0[0m
[12:03:06] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting co

### Submission

In [6]:
test_pred = automl.predict(test)
subs['Survived'] = (test_pred.data[:, 0] > 0.5).astype(int)
subs.to_csv('lightautoml.csv', index = False)
subs.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
