In [1]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from src.utils import best_model_per_grid, get_tuned_model
import pickle
import os

In [2]:
os.chdir('../')

In [3]:
X = pd.read_csv('data/processed/dataset.csv')

In [4]:
X.columns

Index(['Edad', 'Sexo', 'Café', 'Tabaco', 'Alcohol', 'APPHTA', 'APPDM',
       'APPEPOC', 'APPIRC', 'APPEnfValvular', 'APPIMA', 'APPAngina', 'APPACV',
       'No.LesionesCoronarias', 'LesionACD', 'Lesion TCI', 'LesionADA',
       'LesionACircunfleja', 'BCPIAoPrep', 'BCPIAoTrans', 'BCPIAoPost',
       'Dobutamina', 'Dopamina', 'Norepinefrina', 'Epinefrina',
       'Nitroglicerina', 'Hipoglucemia', 'Hiperglucemia', 'Hiponatremia',
       'Hipernatremia ', 'Hipopotasemia', 'Hiperpotasemia',
       'Acidosis metabólica', 'Alcalosis metabólica', 'Acidosis respiratoria',
       'Alcalosis respiratoria', 'DisfRenalPosop', 'DisfNeuroPosop',
       'DisfHepatPosop', 'Estadia', 'Egreso', 'Evoluciòn', 'FEVIPreop',
       'TamañoVI', 'AltContractVI', 'AMI', 'AMI+VSI', 'DAMI', 'DAM + VS',
       'PuentesAR', 'PuentesAGep', 'PuentesVen', 'RevascIncompleta', 'CEC',
       'DuracionCEC', 'Uso vasoactivos pst', 'AltMITransop', 'IMAPeriop',
       'Vasoplejia post', 'Hipoxemia post ', 'PaO2/FiO2 post',
 

In [5]:
X, y = X.drop('SBGC', axis=1), X['SBGC']

In [6]:
rf_model = RandomForestClassifier(random_state=42)

In [7]:
rf_params = {
    'n_estimators': [200, 300, 350],
    'max_depth': [6, 10, 12],
    'min_samples_leaf': [2, 4, 6],
    'min_samples_split': [2, 4, 10],
    'criterion': ['gini', 'entropy']
}

In [8]:
rf_model = best_model_per_grid(rf_model, rf_params, X, y)

  _data = np.array(data, dtype=dtype, copy=copy,


{'criterion': 'entropy', 'max_depth': 6, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 350} 0.9742753623188406


In [9]:
dt_model = DecisionTreeClassifier(random_state=42)

In [10]:
dt_params = {
    'max_depth': [5, 6, 7, 8, 9, 10, 11, 12],
    'max_features': ['sqrt', 'log2', 0.2, 0.5, 0.8],
    'min_samples_leaf': [4, 5, 6, 7, 8, 9, 10, 11, 12],
    'min_samples_split': [4, 5, 6, 7, 8, 9, 10, 11, 12],
    'criterion': ['gini', 'entropy']
}

In [11]:
dt_model = best_model_per_grid(dt_model, dt_params, X, y)

{'criterion': 'entropy', 'max_depth': 5, 'max_features': 0.5, 'min_samples_leaf': 4, 'min_samples_split': 4} 0.9826086956521738


In [12]:
rf_tuned_model, rf_features = get_tuned_model(rf_model, X, y)
rf_features

Index(['Edad', 'Dobutamina', 'Norepinefrina', 'Acidosis metabólica', 'Estadia',
       'FEVIPreop', 'TamañoVI', 'DuracionCEC', 'Uso vasoactivos pst',
       'PaO2/FiO2 post', 'Lactato post', 'FEVI post', 'SvO2'],
      dtype='object')

In [13]:
rf_accuracy = cross_val_score(rf_tuned_model, X, y, cv=5, scoring='accuracy').mean()

rf_accuracy

0.9742753623188406

In [18]:
dt_tuned_model, dt_features = get_tuned_model(dt_model, X, y)

dt_features

Index(['Norepinefrina', 'Lactato post', 'FEVI post'], dtype='object')

In [15]:
dt_accuracy = cross_val_score(dt_tuned_model, X, y, cv=5, scoring='accuracy').mean()

dt_accuracy

0.9826086956521738

In [16]:
with open('models/rf_model.pkl', 'wb') as file:
    pickle.dump(rf_tuned_model, file)

In [17]:
with open('models/dt_model.pkl', 'wb') as file:
    pickle.dump(dt_tuned_model, file)