In [None]:
#xgb with all features on validation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import learning_curve, cross_val_score, validation_curve, GridSearchCV, train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score


xgb_clf = xgb.XGBClassifier()
xgb_clf.fit(X_train, y_train)

y_pred_xgb = xgb_clf.predict(X_val)

print(classification_report(y_val, y_pred_xgb))
print(confusion_matrix(y_val, y_pred_xgb))

sns.heatmap(confusion_matrix(y_val, y_pred_xgb), annot=True, cmap='Blues')
plt.title('Confusion Matrix Heatmap')
plt.show()

train_sizes, train_scores, test_scores = learning_curve(xgb_clf, X_train, y_train, cv=5, train_sizes=np.linspace(0.1, 1.0, 10))

plt.plot(train_sizes, np.mean(train_scores, axis=1), 'o-', color='r', label='Training Score')
plt.plot(train_sizes, np.mean(test_scores, axis=1), 'o-', color='g', label='Cross-Validation Score')
plt.xlabel('Training Examples')
plt.ylabel('Score')
plt.title('Learning Curve')
plt.legend(loc='best')
plt.show()

xgb_acc_all = accuracy_score(y_val, y_pred_xgb)
xgb_f1_all = f1_score(y_val, y_pred_xgb)
xgb_precision_all = precision_score(y_val, y_pred_xgb)
xgb_recall_all = recall_score(y_val, y_pred_xgb)
xgb_auc_all = roc_auc_score(y_val, y_pred_xgb)

y_prob = xgb_clf.predict_proba(X_val)[:, 1]
fpr, tpr, thresholds = roc_curve(y_val, y_prob)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, color='b', label='ROC Curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

