### SMOTENN (SMOTE + ENN)

In [1]:
# Import dependencies
import pandas as pd
from path import Path 
from collections import Counter

file_load = Path("../Resources/cc_default.csv")
df = pd.read_csv(file_load)
df.head()

Unnamed: 0,ID,ln_balance_limit,sex,education,marriage,age,default_next_month
0,1,9.903488,1,2,0,24,1
1,2,11.695247,1,2,1,26,1
2,3,11.407565,1,2,1,34,0
3,4,10.819778,1,2,0,37,0
4,5,10.819778,0,2,0,57,0


In [2]:
# Split the datasets into Feature and Target sets
# get the x_column by using for loop
x_cols = [i for i in df.columns if i not in ("ID", "default_next_month")]
X = df[x_cols]
y = df["default_next_month"].values

In [3]:
# split X, y into Training and Testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [4]:
# To use SMOTENN, first oversampling with SMOTE on imbalanced samples
# Resample the Train datasets
from imblearn.over_sampling import SMOTE
s_ros = SMOTE(random_state=0)

# Fit or Instance in the model 
X_resampled, y_resampled = s_ros.fit_resample(X_train, y_train)

In [5]:
# Use LogisticRegression to generate predictions
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver="lbfgs", random_state=1)
model.fit(X_resampled, y_resampled)

LogisticRegression(random_state=1)

In [7]:
# Evaluate the predictions 
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cm

array([[3680, 2152],
       [ 735,  933]], dtype=int64)

In [8]:
# Accuracy score 
acc_score = balanced_accuracy_score(y_test, y_pred)
acc_score

0.595176944863862

In [9]:
# Evaluate a classification report on imbalanced samples
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.83      0.63      0.56      0.72      0.59      0.36      5832
          1       0.30      0.56      0.63      0.39      0.59      0.35      1668

avg / total       0.72      0.62      0.58      0.65      0.59      0.35      7500

