In [17]:
import hyperopt as hp
from hyperopt import fmin, tpe, hp
from hyperopt.hp import quniform

import time

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

from sklearn.metrics import accuracy_score, precision_score, recall_score

### Classification

In [3]:
readmissions = pd.read_csv('../data/classification/readmissions_clean.csv')

# Split dataset into X and Y
X = readmissions.drop(['readmitted'], axis=1)
y = readmissions.readmitted

In [4]:
# splitting X and Y into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=3, test_size=0.2)

In [13]:
def objective(params):
    n_estimators = int(params['n_estimators'])
    max_depth = int(params['max_depth'])
    max_features = int(params['max_features'])

    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)
    
    return -accuracy_score(y_test, rf.predict(X_test))

space = {
    'n_estimators': quniform('n_estimators', 2, 500, 1),
    'max_depth': quniform('max_depth', 1, 50, 1),
    'max_features': quniform('max_features', 3, 10, 1)
}

In [18]:
start = time.time()                                
best = fmin(objective, space, algo=tpe.suggest, max_evals=100)
end = time.time()

100%|██████████| 100/100 [06:33<00:00,  3.93s/trial, best loss: -0.6304]


In [19]:
print("Best Hyperparameters: ", best)

Best Hyperparameters:  {'max_depth': 8.0, 'max_features': 3.0, 'n_estimators': 253.0}


In [31]:
"""HYPEROPT BASED ON MAXIMIZING ACCURACY"""

rf = RandomForestClassifier(max_depth=best['max_depth'], max_features=int(best['max_features']), n_estimators=int(best['n_estimators']))
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 0.6282
Precision: 0.6309859154929578
Recall: 0.4819277108433735


In [27]:
elapsed_time = end - start
minutes = int(elapsed_time // 60)
seconds = int(elapsed_time % 60)
print("Elapsed time: {} minutes, {} seconds".format(minutes, seconds))

Elapsed time: 6 minutes, 33 seconds
