In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import make_scorer, recall_score, precision_score, f1_score
from tensorflow import keras
from tensorflow.keras import layers

%run "02. EDA.ipynb"

In [15]:
features = ['Call Failure', 
            'Complains', 
            'Subscription Length', 
            'Charge Amount', 
            'Seconds of Use',
            'Frequency of use', 
            'Frequency of SMS', 
            'Distinct Called Numbers']
target = 'Churn'

X = df2[features]
y = df2[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
 [[502  18]
 [ 70  40]]

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92       520
           1       0.69      0.36      0.48       110

    accuracy                           0.86       630
   macro avg       0.78      0.66      0.70       630
weighted avg       0.84      0.86      0.84       630


Accuracy Score: 0.8603174603174604


In [16]:

features = ['Call Failure', 
            'Complains', 
            'Subscription Length', 
            'Charge Amount', 
            'Seconds of Use',
            'Frequency of use', 
            'Frequency of SMS', 
            'Distinct Called Numbers']

target = 'Churn'
X = df2[features]
y = df2[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = LogisticRegression(random_state=42)

scoring_metrics = {
    'Accuracy': 'accuracy',
    'Precision': make_scorer(precision_score),
    'Recall': make_scorer(recall_score),
    'F1': make_scorer(f1_score)
}

for metric, scoring_method in scoring_metrics.items():
    cv_scores = cross_val_score(model, X_scaled, y, cv=5, scoring=scoring_method)
    print(f"Mean {metric}:", cv_scores.mean())


Mean Accuracy: 0.8898412698412699
Mean Precision: 0.8086355399591586
Mean Recall: 0.39191919191919194
Mean F1: 0.5274817753237004


In [20]:
X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_split=0.2)

y_pred_proba = model.predict(X_test_scaled)
y_pred = (y_pred_proba > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy: 0.8254
