In [3]:
# Undersampling using Imblearn
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

data = pd.read_csv("campaign_impact.csv")

X = data.iloc[:,:-1]
y = data.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30)

# Before UnderSampling
print("Before Undersampling: ", Counter(y_train))
Rand_under = RandomUnderSampler(sampling_strategy='majority')

X_train_under, y_train_under = Rand_under.fit_resample(X_train, y_train)

# After UnderSampling
print("After Undersampling: ", Counter(y_train_under))

# Model Traning Before Undersampling
model1=SVC()
fit_model = model1.fit(X_train, y_train)
pred_undersample = fit_model.predict(X_test)
initial = roc_auc_score(y_test, pred_undersample)
print("ROC AUC Score Before UnderSampling: ", roc_auc_score(y_test, pred_undersample))


# Model Traning after Undersampling
model2=SVC()
fit_model_U = model2.fit(X_train_under, y_train_under)
pred_final = fit_model_U.predict(X_test)
final = roc_auc_score(y_test, pred_final)
print("ROC AUC Score After UnderSampling: ", roc_auc_score(y_test, pred_final))

# Percentage Increase in Accuracy
result = ((final-initial)/initial)*100
print("Percentage Increase in Accuracy after using Undersampling:", str(round(result,2))+'%')

Before Undersampling:  Counter({0: 27900, 1: 3747})
After Undersampling:  Counter({0: 3747, 1: 3747})
ROC AUC Score Before UnderSampling:  0.5302373124412421
ROC AUC Score After UnderSampling:  0.7364923925677977
Percentage Increase in Accuracy after using Undersampling: 38.9%


In [4]:
#  Oversampling with SMOTE
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

data = pd.read_csv("campaign_impact.csv")
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30)

# Before OverSampling
print("Before OverSampling: ", Counter(y_train))

SMOTE = SMOTE()

X_train_SMOTE, y_train_SMOTE = SMOTE.fit_resample(X_train, y_train)

# After OverSampling
print("After OverSampling: ", Counter(y_train_SMOTE))

# Model Traning Before OverSampling
over1=SVC()
fit_over = over1.fit(X_train, y_train)
pred = fit_over.predict(X_test)
initial = roc_auc_score(y_test, pred)
print("ROC AUC Score Before OverSampling: ", roc_auc_score(y_test, pred))


# Model Traning after OverSampling
over2=SVC()
fit_over_final = over2.fit(X_train_SMOTE, y_train_SMOTE)
pred_final = fit_over_final.predict(X_test)
final = roc_auc_score(y_test, pred_final)
print("ROC AUC Score After OverSampling: ", roc_auc_score(y_test, pred_final))

# Percentage Increase in Accuracy
result = ((final-initial)/initial)*100
print("Percentage Increase in Accuracy after using Undersampling:", str(round(result,2))+'%')

Before OverSampling:  Counter({0: 27962, 1: 3685})
After OverSampling:  Counter({0: 27962, 1: 27962})
ROC AUC Score Before OverSampling:  0.5161985295957431
ROC AUC Score After OverSampling:  0.7455298834852668
Percentage Increase in Accuracy after using Undersampling: 44.43%
