In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from dateutil.relativedelta import relativedelta


df = pd.read_csv("saas_customer_data.csv", parse_dates=['signup_date'])

df['current_date'] = pd.to_datetime('today')


df['tenure_months'] = df.apply(
    lambda row: relativedelta(row['current_date'], row['signup_date']).years * 12 
                + relativedelta(row['current_date'], row['signup_date']).months, 
    axis=1
)


le = LabelEncoder()
df['plan_encoded'] = le.fit_transform(df['plan'])


features = df[['plan_encoded', 'monthly_revenue', 'sessions_last_month', 'tenure_months']]
target = df['churned']


X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.25, random_state=42)

# Model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predictions
y_pred = rf.predict(X_test)

# Evaluation
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


importances = rf.feature_importances_
feature_names = features.columns

for name, importance in zip(feature_names, importances):
    print(f"{name}: {importance:.3f}")


Confusion Matrix:
 [[546 232]
 [242 230]]

Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.70      0.70       778
           1       0.50      0.49      0.49       472

    accuracy                           0.62      1250
   macro avg       0.60      0.59      0.59      1250
weighted avg       0.62      0.62      0.62      1250

plan_encoded: 0.035
monthly_revenue: 0.145
sessions_last_month: 0.267
tenure_months: 0.553
