## Import libraries:

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


## Load dataset:

In [2]:
df = pd.read_csv('cleandata.csv')

## Separate the features (independent variables) and the target variable (Churn):

In [4]:
X = df.drop('Churn', axis=1)  # Features
y = df['Churn']  # Target

## Split the data into training and testing sets:
* Split the data into 70% training and 30% testing sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Feature scaling

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train XGBoost classifier:

In [7]:
model = XGBClassifier()
model.fit(X_train, y_train)


## Make predictions on the test set:

In [8]:
y_pred = model.predict(X_test)

## Evaluate the model's performance:

In [9]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.7742546142924751
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.88      0.85      1539
           1       0.60      0.50      0.54       574

    accuracy                           0.77      2113
   macro avg       0.71      0.69      0.70      2113
weighted avg       0.76      0.77      0.77      2113

Confusion Matrix:
[[1351  188]
 [ 289  285]]


## Import libraries:

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [10]:
# Create a list of algorithms
algorithms = [
    LogisticRegression(),
    RandomForestClassifier(),
    GradientBoostingClassifier()
]

# Initialize lists to store algorithm names and their corresponding accuracy scores
algorithm_names = []
accuracy_scores = []

# Loop through each algorithm and evaluate its performance
for algorithm in algorithms:
    model_name = algorithm.__class__.__name__
    algorithm_names.append(model_name)

    # Train the model
    algorithm.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = algorithm.predict(X_test)

    # Calculate accuracy and other metrics
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # You can also calculate other evaluation metrics like precision, recall, and F1-score
    # precision = precision_score(y_test, y_pred)
    # recall = recall_score(y_test, y_pred)
    # f1 = f1_score(y_test, y_pred)

# Print the performance of each algorithm
for name, score in zip(algorithm_names, accuracy_scores):
    print(f"{name} Accuracy: {score}")


LogisticRegression Accuracy: 0.7993374349266446
RandomForestClassifier Accuracy: 0.8002839564600095
GradientBoostingClassifier Accuracy: 0.8045433033601515


## Import library to balance data:

In [11]:
from imblearn.over_sampling import SMOTE

## Apply SMOTE oversampling to the training data:

In [12]:
# Initialize the SMOTE object
smote = SMOTE(random_state=42)

# Apply SMOTE to the training data
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [13]:
model = XGBClassifier()
model.fit(X_train_resampled, y_train_resampled)


In [14]:
y_pred = model.predict(X_test)

In [15]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.7823000473260767
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      1539
           1       0.61      0.57      0.59       574

    accuracy                           0.78      2113
   macro avg       0.72      0.72      0.72      2113
weighted avg       0.78      0.78      0.78      2113

Confusion Matrix:
[[1325  214]
 [ 246  328]]


In [16]:
# Create a list of algorithms
algorithms = [
    LogisticRegression(),
    RandomForestClassifier(),
    GradientBoostingClassifier()
]

# Initialize lists to store algorithm names and their corresponding accuracy scores
algorithm_names = []
accuracy_scores = []

# Loop through each algorithm and evaluate its performance
for algorithm in algorithms:
    model_name = algorithm.__class__.__name__
    algorithm_names.append(model_name)

    # Train the model
    algorithm.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = algorithm.predict(X_test)

    # Calculate accuracy and other metrics
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # You can also calculate other evaluation metrics like precision, recall, and F1-score
    # precision = precision_score(y_test, y_pred)
    # recall = recall_score(y_test, y_pred)
    # f1 = f1_score(y_test, y_pred)

# Print the performance of each algorithm
for name, score in zip(algorithm_names, accuracy_scores):
    print(f"{name} Accuracy: {score}")


LogisticRegression Accuracy: 0.7993374349266446
RandomForestClassifier Accuracy: 0.7946048272598202
GradientBoostingClassifier Accuracy: 0.8045433033601515
