In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, roc_curve

with open('../model.pkl', 'rb') as file:
    model = pickle.load(file)
train = pd.read_pickle('train.pkl')
valid = pd.read_pickle('valid.pkl')
target_col = 'loan_status'
train_data = train.drop(target_col, 1)
valid_data = valid.drop(target_col, 1)
train_scores = model.predict_proba(X = train_data)
train_predictions = model.predict(X = train_data)
valid_scores = model.predict_proba(X = valid_data)
valid_predictions = model.predict(X = valid_data)
positive_class = True
index_positive = np.min(np.where(model.named_steps['classifier'].classes_ == positive_class))

## Evaluation Modell

**Classifier with Parameters**

In [None]:
model.named_steps['classifier']

**Feature Importance**

In [None]:
top_n_features = 10
importances = model.named_steps['classifier'].feature_importances_
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(0,top_n_features):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

In [None]:
plt.figure()
plt.bar(range(0,top_n_features), importances[indices][0:top_n_features],
       color="r", align="center")
plt.xticks(range(0,top_n_features), indices)
plt.xlim([-1, top_n_features])
plt.show()

**Classification Report**

Training Set

In [None]:
print(classification_report(y_true=train[target_col],
                      y_pred=train_predictions))

Validation Set

In [None]:
print(classification_report(y_true=valid[target_col],
                      y_pred=valid_predictions))

**Confusion Matrix**

In [None]:
cm = confusion_matrix(y_true=valid[target_col],
                 y_pred=valid_predictions)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title('Confusion matrix of the classifier')
fig.colorbar(cax)
#ax.set_xticklabels([''] + labels)
#ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

**ROC**

In [None]:
roc_auc_score(y_true= valid[target_col],
              y_score = valid_scores[:,index_positive])

In [None]:
fpr, tpr, thresholds = roc_curve(y_true= valid[target_col], y_score=valid_scores[:,index_positive])
plt.plot(fpr, tpr, 'ro')

**Example Predictions**

In [None]:
valid_data_predictions = valid_data.assign(scores = pd.Series(valid_scores[:,index_positive]).values)
high_predictions = valid_data_predictions[valid_data_predictions.scores > 0.98]
low_predictions = valid_data_predictions[valid_data_predictions.scores < 0.3]

High Predictions

In [None]:
high_predictions.head(20)

Low Predictions

In [None]:
low_predictions.head(20)