In [None]:
from xgboost import XGBClassifier
import pandas as pd
from sklearn.metrics import accuracy_score,recall_score, classification_report, confusion_matrix,roc_curve, roc_auc_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

X_train = pd.read_csv(r'../../data/train_data.csv')
Y_train = X_train["Category"]

X_test = pd.read_csv(r'../../data/test_data.csv')
Y_test = X_test["Category"]
X_train= X_train.iloc[:, [i for i in range(5,12)]+ [16,-2]]
X_test= X_test.iloc[:, [i for i in range(5,12)]+ [16,-1]]

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y_train = le.fit_transform(Y_train)
Y_test = le.fit_transform(Y_test)

In [None]:
# create model instance
bst = XGBClassifier(n_estimators=10, max_depth=10, learning_rate=5, objective='binary:logistic')
# fit model
bst.fit(X_train, Y_train)
# make predictions
y_pred = bst.predict(X_test)
y_pred_prob = bst.predict_proba(X_test)[:, 1]

In [None]:
accuracy = accuracy_score(Y_test, y_pred)
report = classification_report(Y_test, y_pred)
conf_matrix = confusion_matrix(Y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
le.inverse_transform(y_pred)

In [None]:
fpr, tpr, thresholds = roc_curve(Y_test, y_pred_prob)
auc = roc_auc_score(Y_test, y_pred_prob)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, linewidth=2, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], 'k--', linewidth=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()
