## code 참고 링크: https://towardsdatascience.com/5-smote-techniques-for-oversampling-your-imbalance-data-b8155bdbe2b5

In [None]:
import pandas as pd
import seaborns as sns
#I read the csv churn data into variable called df. Here I would only use two continuous features CreditScore and Age with the target Exited
df_example = df[['CreditScore', 'Age', 'Exited']]
sns.scatterplot(data = df, x ='CreditScore', y = 'Age', hue = 'Exited')

In [None]:
#Importing SMOTE
from imblearn.over_sampling import SMOTE
#Oversampling the data
smote = SMOTE(random_state = 101)
X, y = smote.fit_resample(df[['CreditScore', 'Age']], df['Exited'])
#Creating a new Oversampling Data Frame
df_oversampler = pd.DataFrame(X, columns = ['CreditScore', 'Age'])
df_oversampler['Exited']
sns.countplot(df_oversampler['Exited'])

In [None]:
sns.scatterplot(data = df_oversampler, x ='CreditScore', y = 'Age', hue = 'Exited')

In [None]:
# Importing the splitter, classification model, and the metric
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
#Splitting the data with stratification
X_train, X_test, y_train, y_test = train_test_split(df_example[['CreditScore', 'Age']], df['Exited'], test_size = 0.2, stratify = df['Exited'], random_state = 101)

In [None]:
#Create an oversampled training data
smote = SMOTE(random_state = 101)
X_oversample, y_oversample = smote.fit_resample(X_train, y_train)

In [None]:
#Training with imbalance data
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
print(classification_report(y_test, classifier.predict(X_test)))

In [None]:
#Training with oversampled data
classifier_o = LogisticRegression()
classifier_o.fit(X_oversample, y_oversample)
print(classification_report(y_test, classifier_o.predict(X_test)))

In [None]:
df_example = df[['CreditScore', 'IsActiveMember', 'Exited']]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_example[['CreditScore', 'IsActiveMember']],df['Exited'], test_size = 0.2,stratify = df['Exited'], random_state = 101)

In [None]:
#Import the SMOTE-NC
from imblearn.over_sampling import SMOTENC
#Create the oversampler. For SMOTE-NC we need to pinpoint the column position where is the categorical features are. In this case, 'IsActiveMember' is positioned in the second column we input [1] as the parameter. If you have more than one categorical columns, just input all the columns position
smotenc = SMOTENC([1],random_state = 101)
X_oversample, y_oversample = smotenc.fit_resample(X_train, y_train)

In [None]:
#Classifier with imbalance data
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
print(classification_report(y_test, classifier.predict(X_test)))

In [None]:
#Classifier with SMOTE-NC
classifier_o = LogisticRegression()
classifier_o.fit(X_oversample, y_oversample)
print(classification_report(y_test, classifier_o.predict(X_test)))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_example[['CreditScore', 'Age']], df['Exited'], test_size = 0.2,  stratify = df['Exited'], random_state = 101)

In [None]:
#By default, the BorderlineSMOTE would use the Borderline-SMOTE1
from imblearn.over_sampling import BorderlineSMOTE
bsmote = BorderlineSMOTE(random_state = 101, kind = 'borderline-1')
X_oversample_borderline, y_oversample_borderline = bsmote.fit_resample(X_train, y_train)

In [None]:
classifier_border = LogisticRegression()
classifier_border.fit(X_oversample_borderline, y_oversample_borderline)
print(classification_report(y_test, classifier_border.predict(X_test)))

In [None]:
from imblearn.over_sampling import SVMSMOTE
svmsmote = SVMSMOTE(random_state = 101)
X_oversample_svm, y_oversample_svm = svmsmote.fit_resample(X_train, y_train)
classifier_svm = LogisticRegression()
classifier_svm.fit(X_oversample_svm, y_oversample_svm)
print(classification_report(y_test, classifier_svm.predict(X_test)))

In [None]:
from imblearn.over_sampling import ADASYN
adasyn = ADASYN(random_state = 101)
X_oversample_ada, y_oversample_ada = adasyn.fit_resample(X_train, y_train)
classifier_ada = LogisticRegression()
classifier_ada.fit(X_oversample_ada, y_oversample_ada)
print(classification_report(y_test, classifier_ada.predict(X_test)))