In [None]:
#Random Forest with best parameters
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Define the hyperparameter grid
param_grid = {'n_estimators': [10, 50, 100],
              'max_depth': [5, 10, 15],
              'min_samples_split': [2, 5, 10]}

# Create a random forest classifier
rf_clf = RandomForestClassifier(random_state=42)

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=rf_clf, param_grid=param_grid, cv=5)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best hyperparameters:", grid_search.best_params_)

# Evaluate the model with the best hyperparameters on the validation set
y_pred_rf = grid_search.predict(X_val)
accuracy = accuracy_score(y_val, y_pred_rf)
print("Accuracy with best hyperparameters:", accuracy)

# Extract feature importances
importances = grid_search.best_estimator_.feature_importances_
features = X_train.columns

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]

X_train

# Rearrange feature names so they match the sorted feature importances
names = [features[i] for i in indices]

# Create plot
plt.figure()

# Create plot title
plt.title("Feature Importance")

# Add bars
plt.bar(range(X_train.shape[1]), importances[indices])

# Add feature names as x-axis labels
plt.xticks(range(X_train.shape[1]), names, rotation=90)

# Show plot
plt.show()

from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix, roc_curve, auc

# Calculate accuracy and f1 score
accuracy = accuracy_score(y_val, y_pred_rf)
f1 = f1_score(y_val, y_pred_rf)

print(classification_report(y_val, y_pred_rf))
print(confusion_matrix(y_val, y_pred_rf))

# Calculate confusion matrix
cm = confusion_matrix(y_val, y_pred_rf)

# Calculate ROC curve and AUC
y_pred_proba_rf = grid_search.predict_proba(X_val)[:, 1]
fpr, tpr, thresholds = roc_curve(y_val, y_pred_proba_rf)
roc_auc = auc(fpr, tpr)

# Print accuracy, f1 score, and confusion matrix
print("Accuracy:", accuracy)
print("F1 score:", f1)
print("Confusion matrix:\n", cm)

cm = confusion_matrix(y_val, y_pred_rf)
sns.heatmap(cm, annot=True, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, label='AUC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()