# Reading the data

In [18]:
import pandas as pd 

# Reading the csv file 
df = pd.read_csv("bank-full.csv", sep=";")
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [19]:
df["y"].value_counts(normalize=True)

y
no     0.883015
yes    0.116985
Name: proportion, dtype: float64

# Defining input and target

In [20]:
X = df.drop(columns=["y"], axis=1)
y = df["y"].map({"no":0, "yes":1})

# Processing the input variables

In [22]:
from sklearn.preprocessing import OrdinalEncoder

# Encoding categorical features
cat_cols = X.select_dtypes(include="object").columns

# Defining the encoder
ord_encoder = OrdinalEncoder()
X[cat_cols] = ord_encoder.fit_transform(X[cat_cols])
X.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,58,4.0,1.0,2.0,0.0,2143,1.0,0.0,2.0,5,8.0,261,1,-1,0,3.0
1,44,9.0,2.0,1.0,0.0,29,1.0,0.0,2.0,5,8.0,151,1,-1,0,3.0
2,33,2.0,1.0,1.0,0.0,2,1.0,1.0,2.0,5,8.0,76,1,-1,0,3.0
3,47,1.0,1.0,3.0,0.0,1506,1.0,0.0,2.0,5,8.0,92,1,-1,0,3.0
4,33,11.0,2.0,3.0,0.0,1,0.0,0.0,2.0,5,8.0,198,1,-1,0,3.0


# Running 5-fold cross-validation

In [23]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.svm import SVC

# Defining the cross-validation method
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Defining the model
svm_md = make_pipeline(StandardScaler(), SVC(probability=True, random_state=42))

# Running the cross-validation
svm_cv = cross_val_score(svm_md, X, y, cv=skf, scoring='roc_auc', n_jobs=-1)

print(f"SVM AUC: {svm_cv.mean():.3f} ± {svm_cv.std():.3f}")

SVM AUC: 0.853 ± 0.007


In [24]:
# Defining the model
svm_md = make_pipeline(StandardScaler(), SVC(C=10, probability=True, random_state=42))

# Running the cross-validation
svm_cv = cross_val_score(svm_md, X, y, cv=skf, scoring='roc_auc', n_jobs=-1)

print(f"SVM AUC: {svm_cv.mean():.3f} ± {svm_cv.std():.3f}")

SVM AUC: 0.862 ± 0.007


In [25]:
from sklearn.preprocessing import MinMaxScaler

# Defining the model
svm_md = make_pipeline(MinMaxScaler(), SVC(C=10, probability=True, random_state=42))

# Running the cross-validation
svm_cv = cross_val_score(svm_md, X, y, cv=skf, scoring='roc_auc', n_jobs=-1)

print(f"SVM AUC: {svm_cv.mean():.3f} ± {svm_cv.std():.3f}")

SVM AUC: 0.883 ± 0.002
