In [4]:
import numpy as np
import pandas as pd
from imblearn.under_sampling import TomekLinks
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#Models 
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Metric
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

In [2]:
churn_data = pd.read_csv('files_for_lab/Customer-Churn.csv')

def to_zero(x):
    if x == ' ':
        return 0
    else:
        return float(x)

churn_data['TotalCharges'] = churn_data.TotalCharges.apply(to_zero)

In [3]:
y = churn_data['Churn']
X = churn_data[['tenure', 'SeniorCitizen', 'MonthlyCharges', 'TotalCharges']]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=50)

over_sampler = SMOTE().fit(X_train, y_train)
X_res, y_res = over_sampler.fit_resample(X, y)

# ## Scale data
scaler = StandardScaler().fit(X_res)
scaler.transform(X_res)

## Run model
lgr = LogisticRegression().fit(X_res, y_res)
prediction = lgr.predict(X_test)
classification_1 = classification_report(y_test, prediction)
print(classification_1)

dt = DecisionTreeClassifier(random_state=0).fit(X_res, y_res)
prediction = dt.predict(X_test)
classification_2 = classification_report(y_test, prediction)
print(classification_2)

              precision    recall  f1-score   support

          No       0.88      0.72      0.79      1709
         Yes       0.48      0.73      0.58       616

    accuracy                           0.72      2325
   macro avg       0.68      0.72      0.68      2325
weighted avg       0.77      0.72      0.73      2325

              precision    recall  f1-score   support

          No       0.99      1.00      0.99      1709
         Yes       0.99      0.97      0.98       616

    accuracy                           0.99      2325
   macro avg       0.99      0.98      0.99      2325
weighted avg       0.99      0.99      0.99      2325



In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=50)

under_sampler = TomekLinks(n_jobs=4).fit(X_train, y_train)
X_res, y_res = under_sampler.fit_resample(X, y)

# ## Scale data
scaler = StandardScaler().fit(X_res)
scaler.transform(X_res)

## Run model
lgr = LogisticRegression().fit(X_res, y_res)
prediction = lgr.predict(X_test)
classification_3 = classification_report(y_test, prediction)
print(classification_3)

dt = DecisionTreeClassifier(random_state=1).fit(X_res, y_res)
prediction = dt.predict(X_test)
classification_4 = classification_report(y_test, prediction)
print(classification_4)

              precision    recall  f1-score   support

          No       0.85      0.85      0.85      1709
         Yes       0.58      0.57      0.57       616

    accuracy                           0.77      2325
   macro avg       0.71      0.71      0.71      2325
weighted avg       0.77      0.77      0.77      2325

              precision    recall  f1-score   support

          No       0.99      0.94      0.96      1709
         Yes       0.85      0.97      0.91       616

    accuracy                           0.95      2325
   macro avg       0.92      0.95      0.93      2325
weighted avg       0.95      0.95      0.95      2325



In [11]:
print(classification_1)
print(classification_2)
print(classification_3)
print(classification_4)

              precision    recall  f1-score   support

          No       0.88      0.72      0.79      1709
         Yes       0.48      0.73      0.58       616

    accuracy                           0.72      2325
   macro avg       0.68      0.72      0.68      2325
weighted avg       0.77      0.72      0.73      2325

              precision    recall  f1-score   support

          No       0.99      1.00      0.99      1709
         Yes       0.99      0.97      0.98       616

    accuracy                           0.99      2325
   macro avg       0.99      0.98      0.99      2325
weighted avg       0.99      0.99      0.99      2325

              precision    recall  f1-score   support

          No       0.85      0.85      0.85      1709
         Yes       0.58      0.57      0.57       616

    accuracy                           0.77      2325
   macro avg       0.71      0.71      0.71      2325
weighted avg       0.77      0.77      0.77      2325

              preci