In [1]:
import pandas as pd
import numpy as np
import os
import sys
sys.path.append('../src/')

from octopus import OctopusML

In [2]:
path_raw    = '../data/raw/'
dirname     = 'titanic/'
filename    = 'titanic.csv'

## Load data

In [3]:
raw_df = pd.read_csv(os.path.join(path_raw, dirname, filename))

In [4]:
print(raw_df.shape)
raw_df.head(3)

(891, 12)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


## Configurations

In [5]:
# X features names
features_type = {'qualitative': ['PassengerId', 
                                 'Pclass',
                                 'Name',
                                 'Sex',                                 
                                 'Ticket',
                                 'Cabin',
                                 'Embarked'
                                ],
                'quantitative': ['Age',
                                 'SibSp',
                                 'Parch',
                                 'Fare'
                                ]
                }

# target name
y_name = 'Survived'

data = raw_df.copy()

path_output = '../data/output/titanic/'

In [6]:
config = {}
config['test_size']        = 0.25
config['min_missing']      = 0.25
config['outliers_method']  = 'lof'
config['alpha_sta']        = 0.05
config['strategy_missing'] = 'median'
config['method_scale']     = 'standard'
config['metric_train']     = 'accuracy'
config['seed']             = 42
config['njobs']            = -1

## Octopus Execution

In [8]:
OctoML = OctopusML(
                test_size        = config['test_size'],
                min_missing      = config['min_missing'],
                outliers_method  = config['outliers_method'],
                alpha_sta        = config['alpha_sta'],
                strategy_missing = config['strategy_missing'],
                method_scale     = config['method_scale'],
                metric_train     = config['metric_train'],
                njobs            = config['njobs'],
                seed             = config['seed'])

In [9]:
results = OctoML.autoML(
                   data          = data,
                   y_name        = y_name,
                   features_type = features_type,
                   path_output   = path_output)

2021-05-29 11:24:38,481 INFO: Started to check the features consistency
2021-05-29 11:24:38,526 INFO: Features: ['PassengerId', 'Name', 'Ticket', 'Cabin'] were removed because its distribution
2021-05-29 11:24:38,528 INFO: Consistency values finished!
2021-05-29 11:24:38,534 INFO: Feature Embarked was imputer with "other"
2021-05-29 11:24:38,536 INFO: None feature were removed because the missing values
2021-05-29 11:24:38,538 INFO: Check the missing values finished!
2021-05-29 11:24:38,540 INFO: Detect outliers started
2021-05-29 11:24:38,541 INFO: Local Outlier Factor method selected
2021-05-29 11:24:38,585 INFO: Detected 80 outliers
2021-05-29 11:24:38,588 INFO: Detect outliers finished


Logistic Regression is going to be fitted
Logistic Regression fitted
Regularized Logistic Regression is going to be fitted
Regularized Logistic Regression fitted
Random Forest is going to be fitted
|   iter    |  target   | max_depth | max_fe... | min_sa... | min_sa... | n_esti... |
-------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.796   [0m | [0m 4.996   [0m | [0m 0.9547  [0m | [0m 37.94   [0m | [0m 65.89   [0m | [0m 120.2   [0m |
| [0m 2       [0m | [0m 0.7763  [0m | [0m 3.248   [0m | [0m 0.1522  [0m | [0m 43.98   [0m | [0m 66.09   [0m | [0m 368.6   [0m |
| [0m 3       [0m | [0m 0.796   [0m | [0m 2.165   [0m | [0m 0.9719  [0m | [0m 42.46   [0m | [0m 33.05   [0m | [0m 131.8   [0m |
| [0m 4       [0m | [0m 0.796   [0m | [0m 3.467   [0m | [0m 0.3735  [0m | [0m 28.61   [0m | [0m 51.72   [0m | [0m 181.1   [0m |
| [0m 5       [0m | [0m 0.7894  [0m | [0m 6.895   [0m | 

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 720x288 with 0 Axes>