In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import pickle
import json

In [2]:
# Load the dataset
data = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [3]:
# Convert TotalCharges to numeric and handle errors
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')

# Fill missing values in numerical columns with their mean
numerical_cols = data.select_dtypes(include=['float64', 'int64']).columns
data[numerical_cols] = data[numerical_cols].fillna(data[numerical_cols].mean())

# Drop unnecessary columns
data.drop(['customerID'], axis=1, inplace=True)


In [4]:

# Encode categorical variables
data = pd.get_dummies(data, drop_first=True)

In [5]:
# Splitting data into features and labels
X = data.drop('Churn_Yes', axis=1)
y = data['Churn_Yes']

In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [8]:
# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

       False       0.83      0.91      0.87      1036
        True       0.65      0.46      0.54       373

    accuracy                           0.79      1409
   macro avg       0.74      0.69      0.70      1409
weighted avg       0.78      0.79      0.78      1409



In [10]:

# Save the model and columns
with open('churn_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [11]:
with open('columns.json', 'w') as f:
    json.dump({'columns': list(X.columns)}, f)

In [12]:
data.head(100)

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,gender_Male,Partner_Yes,Dependents_Yes,PhoneService_Yes,MultipleLines_No phone service,MultipleLines_Yes,...,StreamingTV_Yes,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_One year,Contract_Two year,PaperlessBilling_Yes,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn_Yes
0,0,1,29.85,29.85,False,True,False,False,True,False,...,False,False,False,False,False,True,False,True,False,False
1,0,34,56.95,1889.50,True,False,False,True,False,False,...,False,False,False,True,False,False,False,False,True,False
2,0,2,53.85,108.15,True,False,False,True,False,False,...,False,False,False,False,False,True,False,False,True,True
3,0,45,42.30,1840.75,True,False,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
4,0,2,70.70,151.65,False,False,False,True,False,False,...,False,False,False,False,False,True,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0,12,78.95,927.35,False,False,False,True,False,True,...,False,False,False,False,False,True,False,True,False,True
96,0,71,66.85,4748.70,True,True,True,True,False,True,...,False,False,False,True,False,True,True,False,False,False
97,0,5,21.05,113.85,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,True,True
98,0,52,21.00,1107.20,True,False,False,True,False,False,...,False,True,False,False,True,False,False,False,False,False


In [13]:
data.to_csv('processed_data.csv', index=False)


In [14]:
import pandas as pd

# Create a DataFrame for a single customer with "Churn" scenario
data = {
    "SeniorCitizen": [1],
    "tenure": [5],
    "MonthlyCharges": [90.25],
    "TotalCharges": [450.0],
    "gender_Male": [0],
    "Partner_Yes": [0],
    "Dependents_Yes": [0],
    "PhoneService_Yes": [1],
    "MultipleLines_No phone service": [0],
    "MultipleLines_Yes": [1],
    "InternetService_Fiber optic": [1],
    "InternetService_No": [0],
    "OnlineSecurity_No internet service": [0],
    "OnlineSecurity_Yes": [0],
    "OnlineBackup_No internet service": [0],
    "OnlineBackup_Yes": [1],
    "DeviceProtection_No internet service": [0],
    "DeviceProtection_Yes": [1],
    "TechSupport_No internet service": [0],
    "TechSupport_Yes": [0],
    "StreamingTV_No internet service": [0],
    "StreamingTV_Yes": [1],
    "StreamingMovies_No internet service": [0],
    "StreamingMovies_Yes": [1],
    "Contract_One year": [0],
    "Contract_Two year": [0],
    "PaperlessBilling_Yes": [1],
    "PaymentMethod_Credit card (automatic)": [0],
    "PaymentMethod_Electronic check": [1],
    "PaymentMethod_Mailed check": [0],
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to a CSV file
file_path = "single_customer_churn.csv"
df.to_csv(file_path, index=False)
file_path


'single_customer_churn.csv'