In [None]:
# Importing necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc

# Load the dataset
# Update the path to your dataset file
data = pd.read_csv("creditcard.csv")

# Feature selection and target variable
X = data.drop(columns=["Class"])  # Features
y = data["Class"]                 # Target (0: Non-fraud, 1: Fraud)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Handling class imbalance by specifying class weights
# The 'balanced' mode adjusts weights inversely proportional to class frequencies
decision_tree = DecisionTreeClassifier(class_weight="balanced", random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'max_depth': [5, 10, 15, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5]
}

grid_search = GridSearchCV(estimator=decision_tree, param_grid=param_grid, scoring='recall', cv=5)
grid_search.fit(X_train, y_train)

# Best model
best_tree = grid_search.best_estimator_

# Model evaluation
y_pred = best_tree.predict(X_test)
y_pred_prob = best_tree.predict_proba(X_test)[:, 1]

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Precision-Recall Curve
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_prob)
auc_pr = auc(recall, precision)

# Plot the Precision-Recall Curve
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f"AUC-PR: {auc_pr:.2f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid()
plt.show()

# Best hyperparameters
print("Best Hyperparameters:")
print(grid_search.best_params_)
