<a href="https://colab.research.google.com/github/sakuna47/ML_BankingSystem1/blob/main/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing necessary libraries
!pip install imblearn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load the dataset from Google Drive
file_path = '/content/drive/My Drive/bank-additional-full.csv'
data = pd.read_csv(file_path, sep=';')

# Data Preparation
print("Dataset Shape:", data.shape)
print("\nDataset Overview:")
print(data.head())

# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Handle missing values (if any)
# Dropping rows with missing values
if data.isnull().sum().sum() > 0:
    data = data.dropna()
    print("\nMissing values were found and handled by dropping rows.")
else:
    print("\nNo missing values found.")

# Exploratory Data Analysis (EDA)
print("\nClass Distribution:")
print(data['y'].value_counts())
sns.countplot(x='y', data=data)
plt.title("Class Distribution")
plt.show()

# Encoding categorical variables
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    if col != 'y':
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])
        label_encoders[col] = le

# Encode the target variable
data['y'] = data['y'].map({'yes': 1, 'no': 0})

# Ensure target variable is correctly formatted
y = data['y'].astype(int)  # Explicitly cast to integer

# Feature Scaling
scaler = StandardScaler()
numerical_columns = data.select_dtypes(include=['int64', 'float64']).columns

scaled_features = scaler.fit_transform(data[numerical_columns])
data[numerical_columns] = scaled_features

# Addressing Class Imbalance using SMOTE
X = data.drop('y', axis=1)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)
