# Training Main Model: Churn Prediction

***This notebook trains the main model and calculates perfomances metrics.***

In [1]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
df = pd.read_csv("../input/WatchAlways_customer_churn.csv")

In [3]:
df.head()

Unnamed: 0,customer_id,age,gender,subscription_type,watch_hours,last_login_days,region,device,monthly_fee,churned,payment_method,number_of_profiles,avg_watch_time_per_day,favorite_genre
0,a9b75100-82a8-427a-a208-72f24052884a,51,Other,Basic,14.73,29,Africa,TV,8.99,1,Gift Card,1,0.49,Action
1,49a5dfd9-7e69-4022-a6ad-0a1b9767fb5b,47,Other,Standard,0.7,19,Europe,Mobile,13.99,1,Gift Card,5,0.03,Sci-Fi
2,4d71f6ce-fca9-4ff7-8afa-197ac24de14b,27,Female,Standard,16.32,10,Asia,TV,13.99,0,Crypto,2,1.48,Drama
3,d3c72c38-631b-4f9e-8a0e-de103cad1a7d,53,Other,Premium,4.51,12,Oceania,TV,17.99,1,Crypto,2,0.35,Horror
4,4e265c34-103a-4dbb-9553-76c9aa47e946,56,Other,Standard,1.89,13,Africa,Mobile,13.99,1,Crypto,2,0.13,Action


**Preparation of X and Y**

dropping feature 'customer_id', does not contain predictive information. 

In [4]:
X = df.drop(columns=["churned", "customer_id"])
y = df["churned"]

In [5]:
X.head()

Unnamed: 0,age,gender,subscription_type,watch_hours,last_login_days,region,device,monthly_fee,payment_method,number_of_profiles,avg_watch_time_per_day,favorite_genre
0,51,Other,Basic,14.73,29,Africa,TV,8.99,Gift Card,1,0.49,Action
1,47,Other,Standard,0.7,19,Europe,Mobile,13.99,Gift Card,5,0.03,Sci-Fi
2,27,Female,Standard,16.32,10,Asia,TV,13.99,Crypto,2,1.48,Drama
3,53,Other,Premium,4.51,12,Oceania,TV,17.99,Crypto,2,0.35,Horror
4,56,Other,Standard,1.89,13,Africa,Mobile,13.99,Crypto,2,0.13,Action


In [6]:
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Churn rate: {y.mean():.3f} ({y.mean()*100:.1f}%)")

Features shape: (5000, 12)
Target shape: (5000,)
Churn rate: 0.503 (50.3%)


**Split train/validation/test**

In [7]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, stratify=y, random_state=42)

In [8]:
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

In [9]:
print(f"Train set: {X_train.shape[0]} samples ({X_train.shape[0]/len(df)*100:.1f}%)")
print(f"Validation set: {X_val.shape[0]} samples ({X_val.shape[0]/len(df)*100:.1f}%)")
print(f"Test set: {X_test.shape[0]} samples ({X_test.shape[0]/len(df)*100:.1f}%)")

Train set: 3500 samples (70.0%)
Validation set: 750 samples (15.0%)
Test set: 750 samples (15.0%)


**Model: Gradient Boosting Classifier**

In [10]:
class ChurnModel(BaseEstimator, ClassifierMixin):

    
    def __init__(self):
        self.is_fitted_ = False
        
        num_features = [
            "age",
            "watch_hours",
            "last_login_days",
            "monthly_fee",
            "number_of_profiles",
            "avg_watch_time_per_day"
        ]
        
        cat_features = [
            "gender",
            "subscription_type",
            "region",
            "device",
            "payment_method",
            "favorite_genre"
        ]
        
        preprocessor = ColumnTransformer(
            transformers=[
                ("num", StandardScaler(), num_features),
                ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
            ]
        )
        
        self.pipeline_ = Pipeline([
            ("preprocessor", preprocessor),
            ("classifier", GradientBoostingClassifier(
                n_estimators=100,      
                max_depth=5,           
                learning_rate=0.1,     
                random_state=42        
            ))
        ])
    
    def fit(self, X, y):
        self.pipeline_.fit(X, y)
        self.is_fitted_ = True
        return self
    
    def predict(self, X):
        check_is_fitted(self, "is_fitted_")
        return self.pipeline_.predict(X)
    
    def predict_proba(self, X):
        check_is_fitted(self, "is_fitted_")
        return self.pipeline_.predict_proba(X)

***Function to print metrics***

In [11]:
def print_metrics(name, y_true, y_pred, y_proba):
    print(f"{name} - Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print(f"{name} - F1-Score: {f1_score(y_true, y_pred):.4f}")
    try:
        print(f"{name} - ROC-AUC: {roc_auc_score(y_true, y_proba):.4f}")
    except ValueError:
        print(f"{name} - ROC-AUC: N/A")
    print()

**Model training**

In [12]:
model = ChurnModel()

model.fit(X_train, y_train)
print("training completed")

training completed


**calculate metrics on the training set**

In [13]:
y_train_pred = model.predict(X_train)
y_train_proba = model.predict_proba(X_train)[:, 1]

In [14]:
print("training set metrics:")
print_metrics("train", y_train, y_train_pred, y_train_proba)

training set metrics:
train - Accuracy: 1.0000
train - F1-Score: 1.0000
train - ROC-AUC: 1.0000



**calculate metrics on the validatio set**

In [15]:
y_val_pred = model.predict(X_val)
y_val_proba = model.predict_proba(X_val)[:, 1]

In [16]:
print("validation set metrics")
print_metrics("validation", y_val, y_val_pred, y_val_proba)

validation set metrics
validation - Accuracy: 0.9920
validation - F1-Score: 0.9920
validation - ROC-AUC: 0.9972



### Comparison with Baseline
The baseline had this performance on validation:
- Accuracy: 0.4720
- F1-Score: 0.4748
- ROC-AUC: 0.5000
Our model should significantly improve on all metrics.

In [17]:
print("=" * 60)
print(f"{'metric':<15} {'baseline val':<18} {'modello val':<18}")
print("=" * 60)
print(f"{'accuracy':<15} {'0.4720':<18} {accuracy_score(y_val, y_val_pred):<18.4f}")
print(f"{'F1-Score':<15} {'0.4748':<18} {f1_score(y_val, y_val_pred):<18.4f}")
print(f"{'ROC-AUC':<15} {'0.5000':<18} {roc_auc_score(y_val, y_val_proba):<18.4f}")
print("=" * 60)

roc_improvement = ((roc_auc_score(y_val, y_val_proba) - 0.5000) / 0.5000) * 100
print(f"roc-auc improvement: +{roc_improvement:.1f}%")


metric          baseline val       modello val       
accuracy        0.4720             0.9920            
F1-Score        0.4748             0.9920            
ROC-AUC         0.5000             0.9972            
roc-auc improvement: +99.4%


# Data leakage check, correlation

In [18]:
df_numeric = df.select_dtypes(include=[np.number])
correlations = df_numeric.corr()["churned"].sort_values(ascending=False)
print(correlations)

churned                   1.000000
last_login_days           0.471590
age                      -0.003515
monthly_fee              -0.152117
number_of_profiles       -0.158614
avg_watch_time_per_day   -0.272958
watch_hours              -0.479916
Name: churned, dtype: float64


The reason why the validation metrics and training metrics are so high is because the dataset is synthetic. 

## Test set evaluation

In [19]:
print("TEST SET EVALUATION (FINAL)")
y_test_pred = model.predict(X_test)
y_test_proba = model.predict_proba(X_test)[:, 1]
print_metrics("Test", y_test, y_test_pred, y_test_proba)

TEST SET EVALUATION (FINAL)
Test - Accuracy: 0.9920
Test - F1-Score: 0.9920
Test - ROC-AUC: 0.9977

