In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('customer_chern_dataset.csv')

In [3]:
print(data.head())
print(data.info())

   CustomerID  Tenure  MonthlyCharges  TotalCharges        Contract  \
0        1001       5            70.0         350.0  Month-to-month   
1        1002      10            85.5         850.5        Two year   
2        1003       3            55.3         165.9        One year   
3        1004       8            90.0         720.0  Month-to-month   
4        1005       2            65.2         130.4        One year   

      PaymentMethod  Churn  
0  Electronic check      1  
1      Mailed check      0  
2  Electronic check      1  
3       Credit card      0  
4  Electronic check      1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   CustomerID      5 non-null      int64  
 1   Tenure          5 non-null      int64  
 2   MonthlyCharges  5 non-null      float64
 3   TotalCharges    5 non-null      float64
 4   Contract        5 non-nu

In [4]:
data = data.drop(columns=['CustomerID']) #Simplify the dataset
data = data.dropna()  # Simple example of dropping missing values

In [5]:
data = pd.get_dummies(data, drop_first=True)

In [6]:
X = data.drop('Churn', axis=1)
y = data['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [8]:
model.fit(X_train, y_train)

In [9]:
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f'Test accuracy: {accuracy}')

Test accuracy: 1.0


In [10]:
# Simplify model by limiting its maximum depth
pruned_model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10, max_features='sqrt')

pruned_model.fit(X_train, y_train)
pruned_predictions = pruned_model.predict(X_test)
pruned_accuracy = accuracy_score(y_test, pruned_predictions)
print(f'Pruned Test accuracy: {pruned_accuracy}')

Pruned Test accuracy: 1.0


In [11]:
import joblib
joblib.dump(model, 'churn_model.pkl')

['churn_model.pkl']