In [2]:
import pandas as pd 

dataset = pd.read_csv('titanic.csv', sep=',', decimal='.')

filtrado = (
    dataset[['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']]
        .join(pd.get_dummies(dataset['Embarked'], prefix='Embarked'))
        .join(pd.get_dummies(dataset['Sex'], prefix='Sex'))
        .dropna()
        .assign(Survived=lambda x: x['Survived'].astype(bool))
)

filtrado



Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S,Sex_female,Sex_male
0,False,3,22.0,1,0,7.2500,False,False,True,False,True
1,True,1,38.0,1,0,71.2833,True,False,False,True,False
2,True,3,26.0,0,0,7.9250,False,False,True,True,False
3,True,1,35.0,1,0,53.1000,False,False,True,True,False
4,False,3,35.0,0,0,8.0500,False,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...
885,False,3,39.0,0,5,29.1250,False,True,False,True,False
886,False,2,27.0,0,0,13.0000,False,False,True,False,True
887,True,1,19.0,0,0,30.0000,False,False,True,True,False
889,True,1,26.0,0,0,30.0000,True,False,False,False,True


In [3]:
filtrado.info()

<class 'pandas.core.frame.DataFrame'>
Index: 714 entries, 0 to 890
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Survived    714 non-null    bool   
 1   Pclass      714 non-null    int64  
 2   Age         714 non-null    float64
 3   SibSp       714 non-null    int64  
 4   Parch       714 non-null    int64  
 5   Fare        714 non-null    float64
 6   Embarked_C  714 non-null    bool   
 7   Embarked_Q  714 non-null    bool   
 8   Embarked_S  714 non-null    bool   
 9   Sex_female  714 non-null    bool   
 10  Sex_male    714 non-null    bool   
dtypes: bool(6), float64(2), int64(3)
memory usage: 37.7 KB


In [4]:
from pycaret.classification import *


In [5]:

exp = ClassificationExperiment()

exp.setup(data=filtrado, target = 'Survived', session_id=22, log_experiment="mlflow", experiment_name="titanic")

exp

Unnamed: 0,Description,Value
0,Session id,22
1,Target,Survived
2,Target type,Binary
3,Original data shape,"(714, 11)"
4,Transformed data shape,"(714, 11)"
5,Transformed train set shape,"(499, 11)"
6,Transformed test set shape,"(215, 11)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


<pycaret.classification.oop.ClassificationExperiment at 0x263a1000710>

In [6]:
best = exp.compare_models()


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.8076,0.8584,0.719,0.7949,0.7501,0.595,0.6016,0.026
lr,Logistic Regression,0.7977,0.8554,0.6995,0.7923,0.7355,0.5735,0.5831,0.542
catboost,CatBoost Classifier,0.7976,0.8609,0.7086,0.7755,0.7377,0.5738,0.578,0.596
ridge,Ridge Classifier,0.7957,0.8544,0.685,0.8019,0.7288,0.5677,0.5813,0.008
lda,Linear Discriminant Analysis,0.7957,0.8541,0.685,0.8019,0.7288,0.5677,0.5813,0.008
ada,Ada Boost Classifier,0.7897,0.8329,0.7348,0.7499,0.7364,0.5623,0.5684,0.022
lightgbm,Light Gradient Boosting Machine,0.7836,0.8463,0.6943,0.7569,0.7193,0.5445,0.5501,0.084
nb,Naive Bayes,0.7817,0.8407,0.7248,0.7484,0.7294,0.5473,0.5548,0.008
rf,Random Forest Classifier,0.7715,0.8487,0.719,0.7288,0.7169,0.5261,0.5328,0.041
dt,Decision Tree Classifier,0.7634,0.755,0.719,0.716,0.7108,0.5115,0.5183,0.007




In [7]:
exp.evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [9]:
#run in line command : mlflow server to see results in mlflow

model_lr = exp.create_model('lr')


tuned = exp.tune_model(model_lr, n_iter= 1000, optimize='F1')


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.72,0.7947,0.619,0.6842,0.65,0.4176,0.4191
1,0.8,0.8916,0.7619,0.7619,0.7619,0.5895,0.5895
2,0.84,0.8736,0.7143,0.8824,0.7895,0.6627,0.6724
3,0.68,0.7483,0.45,0.6429,0.5294,0.2982,0.3091
4,0.84,0.8383,0.6,1.0,0.75,0.6429,0.6882
5,0.82,0.92,0.8,0.7619,0.7805,0.6281,0.6286
6,0.8,0.8883,0.85,0.7083,0.7727,0.5968,0.6047
7,0.78,0.84,0.75,0.7143,0.7317,0.5455,0.5459
8,0.84,0.8933,0.75,0.8333,0.7895,0.661,0.6634
9,0.8571,0.8655,0.7,0.9333,0.8,0.6924,0.7097




Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.72,0.8062,0.6667,0.6667,0.6667,0.4253,0.4253
1,0.86,0.8982,0.9048,0.7917,0.8444,0.7182,0.7235
2,0.86,0.867,0.7619,0.8889,0.8205,0.7069,0.7125
3,0.68,0.74,0.55,0.6111,0.5789,0.322,0.3232
4,0.8,0.8383,0.65,0.8125,0.7222,0.569,0.5776
5,0.82,0.9283,0.8,0.7619,0.7805,0.6281,0.6286
6,0.78,0.8767,0.9,0.6667,0.766,0.5669,0.5898
7,0.76,0.845,0.75,0.6818,0.7143,0.5082,0.5099
8,0.86,0.9,0.85,0.8095,0.8293,0.7107,0.7114
9,0.8571,0.8603,0.75,0.8824,0.8108,0.6973,0.7032


Fitting 10 folds for each of 1000 candidates, totalling 10000 fits


