In [1]:
from google.colab import files

print("Please select your 'Prediction_Data.xlsx' file.")
uploaded = files.upload()

# Get the filename of the uploaded file
file_name = next(iter(uploaded))
print(f"\nSuccessfully uploaded: '{file_name}'")

Please select your 'Prediction_Data.xlsx' file.


Saving churn_data.xlsx to churn_data.xlsx

Successfully uploaded: 'churn_data.xlsx'


In [2]:
# 1. Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import joblib
from google.colab import files

In [3]:
file_path = 'churn_data.xlsx'

# Define sheet names
sheet_name_train = 'vw_ChurnData'
sheet_name_new = 'vw_JoinData'

# Read the training data
print("\nLoading training data...")
data = pd.read_excel(file_path, sheet_name=sheet_name_train)
print("Training data loaded successfully.")


Loading training data...
Training data loaded successfully.


In [4]:
# 2. Data Preprocessing
print("\nPreprocessing data...")
data = data.drop(['Customer_ID', 'Churn_Category', 'Churn_Reason'], axis=1)

columns_to_encode = [
    'Gender', 'Married', 'State', 'Value_Deal', 'Phone_Service', 'Multiple_Lines',
    'Internet_Service', 'Internet_Type', 'Online_Security', 'Online_Backup',
    'Device_Protection_Plan', 'Premium_Support', 'Streaming_TV', 'Streaming_Movies',
    'Streaming_Music', 'Unlimited_Data', 'Contract', 'Paperless_Billing',
    'Payment_Method'
]

label_encoders = {}
for column in columns_to_encode:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

data['Customer_Status'] = data['Customer_Status'].map({'Stayed': 0, 'Churned': 1})
print("Data preprocessing complete.")

X = data.drop('Customer_Status', axis=1)
y = data['Customer_Status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



Preprocessing data...
Data preprocessing complete.


In [5]:
# 3. Train Random Forest Model
print("\nTraining Random Forest model...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
print("Model training complete.")



Training Random Forest model...
Model training complete.


In [6]:
print("\nEvaluating model performance...")
y_pred = rf_model.predict(X_test)
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Stayed', 'Churned']))


Evaluating model performance...

Confusion Matrix:
[[791  50]
 [125 236]]

Classification Report:
              precision    recall  f1-score   support

      Stayed       0.86      0.94      0.90       841
     Churned       0.83      0.65      0.73       361

    accuracy                           0.85      1202
   macro avg       0.84      0.80      0.81      1202
weighted avg       0.85      0.85      0.85      1202



In [7]:
# 5. Use Model for Prediction on New Data
print("\nLoading new data for prediction...")
new_data = pd.read_excel(file_path, sheet_name=sheet_name_new)
original_new_data = new_data.copy()

features_for_prediction = new_data.drop(['Customer_ID', 'Customer_Status', 'Churn_Category', 'Churn_Reason'], axis=1)

for column in columns_to_encode:
    if column in features_for_prediction.columns:
        le = label_encoders[column]
        features_for_prediction[column] = le.transform(features_for_prediction[column])

print("Making predictions on new data...")
new_predictions = rf_model.predict(features_for_prediction)
new_predictions_proba = rf_model.predict_proba(features_for_prediction)[:, 1]

original_new_data['Predicted_Status'] = np.where(new_predictions == 1, 'Churn', 'Stay')
original_new_data['Churn_Probability'] = new_predictions_proba
print("Predictions complete.")



Loading new data for prediction...
Making predictions on new data...
Predictions complete.


In [8]:
# 6. Export Predictions to your Computer
print("\nExporting results...")
churn_predictions_df = original_new_data[original_new_data['Predicted_Status'] == 'Churn']
output_filename = 'Churn_Predictions.csv'
churn_predictions_df.to_csv(output_filename, index=False)

# This line triggers the download to your computer
files.download(output_filename)

print(f"\n✅ Success! The '{output_filename}' file has been downloaded to your computer.")


Exporting results...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Success! The 'Churn_Predictions.csv' file has been downloaded to your computer.
