# Healthcare.ai Advanced Example

In [None]:
%matplotlib inline
from sklearn.pipeline import Pipeline
import healthcareai as hcai
import healthcareai.common.transformers as hcai_transformers
import healthcareai.common.filters as hcai_filters
import healthcareai.trained_models.trained_supervised_model as hcai_tsm
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20.0, 10.0)
plt.rcParams.update({'font.size': 14})

## Step 1: Load training data

In [None]:
df = hcai.load_diabetes()
df.head()

In [None]:
df.iloc[:,2:5].hist()
df.describe()

## Step 2: Create a custom data preparation pipeline

In [None]:
clean_training_df = Pipeline([
    ('remove_DTS_columns', hcai_filters.DataframeColumnSuffixFilter()),
    ('remove_grain_column', hcai_filters.DataframeColumnRemover(columns_to_remove=['PatientID', 'PatientEncounterID'])),
    ('imputation', hcai_transformers.DataFrameImputer(impute=True)),
    ('null_row_filter', hcai_filters.DataframeNullValueFilter(excluded_columns=None)),
    ('convert_target_to_binary', hcai_transformers.DataFrameConvertTargetToBinary('classification', 'ThirtyDayReadmitFLG')),
    ('prediction_to_numeric', hcai_transformers.DataFrameConvertColumnToNumeric('ThirtyDayReadmitFLG')),
    ('create_dummy_variables', hcai_transformers.DataFrameCreateDummyVariables(excluded_columns=['ThirtyDayReadmitFLG'])),
]).fit_transform(df)

clean_training_df.head()

## Step 3: Create an advanced trainer with prepared training data

In [None]:
trainer = hcai.AdvancedSupervisedModelTrainer(
    dataframe=clean_training_df,
    model_type='classification',
    predicted_column='ThirtyDayReadmitFLG',
    grain_column='PatientEncounterID',
    verbose=False)

## Step 4: Split the data into training and test sets

Splits the data into 80/20 training and testing sets. Training sets are used to train the model and testing sets are used to estimate how well the model has been trained.

In [None]:
trainer.train_test_split()

## Step 5: Train models with custom hyperparameters

Hyperparameters are parameters that tune the algorithm in an attempt to optimize performance and produce more accurate results.

### Train a Logistic Regression model

In [None]:
lr_hyperparameters = {
    'C': [0.01, 0.1, 1, 10, 100],
    'class_weight': [None, 'balanced']
}

trained_lr = trainer.logistic_regression(
    scoring_metric='roc_auc',
    hyperparameter_grid=lr_hyperparameters,
    randomized_search=True,
    number_iteration_samples=10)

### Train a Random Forest model

In [None]:
rf_hyperparameters = {
    'n_estimators': [50, 100, 200, 300],
    'max_features': [1, 2, 3, 4],
    'max_leaf_nodes': [None, 30, 400]
}

trained_rf = trainer.random_forest_classifier(
    scoring_metric='roc_auc',
    hyperparameter_grid=rf_hyperparameters,
    randomized_search=True,
    number_iteration_samples=10)

hcai_tsm.plot_rf_features_from_tsm(trained_rf, trainer.x_train)

### Train an ensemble classification

This provides a simple way to put data in and have healthcare.ai train a few models and pick the best one for your data. You can specify the models you want to compare or you can let the ensemble trainer automatically train a Logistic Regression, KNN, and Random Forest for you.

In [None]:
trained_ensemble = trainer.ensemble_classification(
    scoring_metric='roc_auc')

In [None]:
custom_ensemble_models = {
    'Logistic Regression': trained_lr,
    'Random Forest': trained_rf
}

custom_ensemble = trainer.ensemble_classification(
    scoring_metric='roc_auc',
    trained_model_by_name=custom_ensemble_models)

## Step 6: Evaluate and compare models

In [None]:
# Create a list of all the models you just trained that you want to compare
models_to_compare = [trained_lr, trained_rf, trained_ensemble]

# Create a ROC plot that compares them.
hcai_tsm.tsm_classification_comparison_plots(
    trained_supervised_models=models_to_compare,
    plot_type='ROC',
    save=False)

# Create a PR plot that compares them.
hcai_tsm.tsm_classification_comparison_plots(
    trained_supervised_models=models_to_compare,
    plot_type='PR',
    save=False)