In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report

# Load the dataset
df = pd.read_csv('bank-full.csv', sep=';')

# Exploratory Data Analysis (EDA)
# Display first few rows, summary statistics, and check for missing values
print(df.head())
print(df.describe())
print(df.isnull().sum())

# Data Cleaning and Preprocessing
# Encoding categorical variables
df = pd.get_dummies(df, drop_first=True)

# Splitting the data
X = df.drop('y_yes', axis=1)
y = df['y_yes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Modeling and Evaluation

# K-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, knn_pred))
print("KNN F1-Score:", f1_score(y_test, knn_pred))

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
log_reg_pred = log_reg.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, log_reg_pred))
print("Logistic Regression F1-Score:", f1_score(y_test, log_reg_pred))

# Decision Trees
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print("Decision Tree F1-Score:", f1_score(y_test, dt_pred))

# Support Vector Machines
svm = SVC()
svm.fit(X_train, y_train)
svm_pred = svm.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("SVM F1-Score:", f1_score(y_test, svm_pred))

# Comparing Model Performance
models = ['KNN', 'Logistic Regression', 'Decision Tree', 'SVM']
accuracies = [accuracy_score(y_test, knn_pred), accuracy_score(y_test, log_reg_pred), accuracy_score(y_test, dt_pred), accuracy_score(y_test, svm_pred)]
f1_scores = [f1_score(y_test, knn_pred), f1_score(y_test, log_reg_pred), f1_score(y_test, dt_pred), f1_score(y_test, svm_pred)]

comparison_df = pd.DataFrame({'Model': models, 'Accuracy': accuracies, 'F1-Score': f1_scores})
print(comparison_df)

# Findings and Recommendations



: 