<a href="https://colab.research.google.com/github/sakuna47/ML_BankingSystem1/blob/main/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing necessary libraries
!pip install imblearn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.metrics import roc_curve, roc_auc_score

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load the dataset from Google Drive
file_path = '/content/drive/My Drive/bank-additional-full.csv'
data = pd.read_csv(file_path, sep=';')

# Data Preparation
print("Dataset Shape:", data.shape)
print("\nDataset Overview:")
print(data.head())

# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Handle missing values (if any)
# Dropping rows with missing values
if data.isnull().sum().sum() > 0:
    data = data.dropna()
    print("\nMissing values were found and handled by dropping rows.")
else:
    print("\nNo missing values found.")

# Exploratory Data Analysis (EDA)
print("\nClass Distribution:")
print(data['y'].value_counts())
sns.countplot(x='y', data=data)
plt.title("Class Distribution")
plt.show()

# Encoding categorical variables
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    if col != 'y':
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

# Encode the target variable
data['y'] = data['y'].map({'yes': 1, 'no': 0})

# Ensure target variable is correctly formatted
y = data['y'].astype(int)  # Explicitly cast to integer

# Feature Scaling
scaler = StandardScaler()
numerical_columns = data.select_dtypes(include=['int64', 'float64']).columns

scaled_features = scaler.fit_transform(data[numerical_columns])
data[numerical_columns] = scaled_features

# Addressing Class Imbalance using SMOTE
X = data.drop('y', axis=1)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Model 1: Random Forest Classifier with Extended Hyperparameter Tuning
rf_params = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}
rf_model = RandomForestClassifier(random_state=42)
grid_rf = GridSearchCV(rf_model, rf_params, cv=3, scoring='f1', n_jobs=-1)
grid_rf.fit(X_train, y_train)
rf_best_model = grid_rf.best_estimator_
rf_predictions = rf_best_model.predict(X_test)

# Evaluation: Random Forest
rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_precision = precision_score(y_test, rf_predictions)
rf_recall = recall_score(y_test, rf_predictions)
rf_f1 = f1_score(y_test, rf_predictions)
print("Random Forest Classifier Metrics:")
print(f"Best Parameters: {grid_rf.best_params_}")
print(f"Accuracy: {rf_accuracy:.4f}")
print(f"Precision: {rf_precision:.4f}")
print(f"Recall: {rf_recall:.4f}")
print(f"F1-Score: {rf_f1:.4f}")

# Confusion Matrix for Random Forest
rf_cm = confusion_matrix(y_test, rf_predictions)
sns.heatmap(rf_cm, annot=True, fmt='d', cmap='Blues')
plt.title("Random Forest Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Model 2: Neural Network (MLPClassifier) with Increased Layers and Nodes
mlp_model = MLPClassifier(hidden_layer_sizes=(150, 100, 50), max_iter=500, random_state=42, solver='adam', learning_rate_init=0.001)
mlp_model.fit(X_train, y_train)
mlp_predictions = mlp_model.predict(X_test)

# Evaluation: Neural Network
mlp_accuracy = accuracy_score(y_test, mlp_predictions)
mlp_precision = precision_score(y_test, mlp_predictions)
mlp_recall = recall_score(y_test, mlp_predictions)
mlp_f1 = f1_score(y_test, mlp_predictions)
print("\nNeural Network Metrics:")
print(f"Accuracy: {mlp_accuracy:.4f}")
print(f"Precision: {mlp_precision:.4f}")
print(f"Recall: {mlp_recall:.4f}")
print(f"F1-Score: {mlp_f1:.4f}")

# Confusion Matrix for Neural Network
mlp_cm = confusion_matrix(y_test, mlp_predictions)
sns.heatmap(mlp_cm, annot=True, fmt='d', cmap='Greens')
plt.title("Neural Network Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Compare Results
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
rf_scores = [rf_accuracy, rf_precision, rf_recall, rf_f1]
mlp_scores = [mlp_accuracy, mlp_precision, mlp_recall, mlp_f1]

results_df = pd.DataFrame({'Metric': metrics, 'Random Forest': rf_scores, 'Neural Network': mlp_scores})
print("\nComparison of Models:")
print(results_df)

# Plot comparison
results_df.set_index('Metric').plot(kind='bar', figsize=(10, 6))
plt.title("Model Comparison")
plt.ylabel("Score")
plt.xticks(rotation=0)
plt.legend(loc='lower right')
plt.show()


# Calculate predicted probabilities for ROC curve
rf_probs = rf_best_model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class
mlp_probs = mlp_model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class

# Calculate AUC scores
rf_auc = roc_auc_score(y_test, rf_probs)
mlp_auc = roc_auc_score(y_test, mlp_probs)

# Plot ROC Curves
rf_fpr, rf_tpr, _ = roc_curve(y_test, rf_probs)
mlp_fpr, mlp_tpr, _ = roc_curve(y_test, mlp_probs)

plt.plot(rf_fpr, rf_tpr, label=f"Random Forest (AUC = {rf_auc:.4f})")
plt.plot(mlp_fpr, mlp_tpr, label=f"Neural Network (AUC = {mlp_auc:.4f})")
plt.plot([0, 1], [0, 1], 'k--', label="No Skill")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.show()


