In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, accuracy_score

In [None]:
def fit_random_forest(grid, X, y, cv=3, scorer='roc_auc', n_jobs=-1):
    rf = RandomForestClassifier()
    grid_search = GridSearchCV(estimator = rf, param_grid = grid, 
                          cv = cv, n_jobs = n_jobs, scoring = scorer)
    grid_search.fit(X,y)
    best_estimator = grid_search.best_estimator_
    return grid_search, best_estimator

In [None]:
def save_model(model, name):
    pickle.dump(model, open(name, 'wb'))

In [None]:
def get_prob_pred(best_estimator, X_test):
    probabilities = best_estimator.predict_proba(X_test)[:,1]
    predictions = best_estimator.predict(X_test)
    return probabilities, predictions

def get_prob_distribution(probs, preds):
    print(pd.Series(preds).value_counts())
    pd.Series(probs).hist(bins=100)

In [None]:
def get_auc_score(probs, actual):
    auc_score = roc_auc_score(actual, probs)
    return auc_score

In [None]:
def establishing_threshold_with_accuracy(probs, threshold, actual):
    threshold_predictions = pd.Series(np.where(probs > threshold, 1, 0))
    accuracy_rate = accuracy_score(actual, threshold_predictions)
    return threshold, threshold_predictions, accuracy_rate

In [None]:
def get_classification_report(actual, threshold_preds):
    return classification_report(actual, threshold_preds)

In [None]:
def get_feature_importances(model, columns, n=10):
    feature_importance = pd.Series(model.feature_importances_, index=columns)
    ax = feature_importance.nlargest(n).sort_values(ascending=True).plot.barh()
    plt.show()


In [None]:
def plot_confusion_matrix(actual, preds):
    matrix = confusion_matrix(actual, predicted)
    x_axis_labels = sorted(predicted.unique().tolist())
    y_axis_labels = sorted(actual.unique().tolist())
    ax = plt.subplot()
    sns.heatmap(matrix, cmap="Blues", cbar=False, annot=True, fmt=',',
                linewidths=1, linecolor='grey', square=True)
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title('Confusion Matrix')
    ax.xaxis.set_ticklabels(x_axis_labels)
    ax.yaxis.set_ticklabels(y_axis_labels)
    plt.show()