In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
try:
    df = pd.read_csv('/content/telco_churn.csv')
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: 'telco_churn.csv' not found.")
    print("Please download the Telco Customer Churn dataset from Kaggle and save it as 'telco_churn.csv' in this directory.")
    exit()

Error: 'telco_churn.csv' not found.
Please download the Telco Customer Churn dataset from Kaggle and save it as 'telco_churn.csv' in this directory.


In [None]:
# --- 2. Data Preprocessing ---

# Drop customerID as it's not a feature
if 'customerID' in df.columns:
    df = df.drop('customerID', axis=1)

# Handle missing values in TotalCharges (common issue)
# Convert to numeric, coercing errors (like empty strings) to NaN
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
# Impute NaNs with the median
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())

# Convert target variable 'Churn' to binary
df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

# Define feature types
# 'tenure', 'MonthlyCharges', 'TotalCharges' are numeric
# All others (except Churn) are categorical
target = 'Churn'
numeric_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
categorical_features = [col for col in df.columns if col not in numeric_features + [target]]

print(f"Numeric features: {numeric_features}")
print(f"Categorical features: {categorical_features}")

Numeric features: ['tenure', 'MonthlyCharges', 'TotalCharges']
Categorical features: ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']


In [None]:
# --- 3. Create Preprocessing Pipeline ---

# Create pipeline for numeric features: Impute (just in case) and Scale
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Create pipeline for categorical features: Impute (with constant) and One-Hot Encode
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine pipelines using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [None]:
# --- 4. Split Data ---
X = df.drop(target, axis=1)
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# --- 5. Apply Preprocessing ---
# Fit the preprocessor on training data and transform it
X_train_processed = preprocessor.fit_transform(X_train)

# Only transform the test data
X_test_processed = preprocessor.transform(X_test)

In [None]:
# --- 6. Build ANN Model ---
model = Sequential([
    # Input layer: shape must match the number of processed features
    Dense(64, activation='relu', input_shape=(X_train_processed.shape[1],)),
    Dropout(0.3),  # Dropout for regularization
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    # Output layer: 1 neuron with sigmoid for binary classification
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("\nModel Summary:")
model.summary()


Model Summary:


In [None]:
# --- 7. Train Model ---
print("\nStarting model training...")
history = model.fit(
    X_train_processed,
    y_train,
    epochs=50,  # You can increase this for better accuracy
    batch_size=32,
    validation_split=0.2,
    verbose=1
)
print("Model training complete.")


Starting model training...
Epoch 1/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6150 - loss: 0.6253 - val_accuracy: 0.8066 - val_loss: 0.4210
Epoch 2/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7790 - loss: 0.4542 - val_accuracy: 0.8039 - val_loss: 0.4192
Epoch 3/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7908 - loss: 0.4407 - val_accuracy: 0.8137 - val_loss: 0.4177
Epoch 4/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7852 - loss: 0.4477 - val_accuracy: 0.8075 - val_loss: 0.4135
Epoch 5/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7922 - loss: 0.4305 - val_accuracy: 0.8039 - val_loss: 0.4098
Epoch 6/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7933 - loss: 0.4421 - val_accuracy: 0.8066 - val_loss: 0.4095


In [None]:
# --- 8. Evaluate Model ---
loss, accuracy = model.evaluate(X_test_processed, y_test)
print(f'\nTest Accuracy: {accuracy * 100:.2f}%')

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7960 - loss: 0.4226 

Test Accuracy: 80.06%


In [None]:
# --- 9. Save Model and Preprocessor ---
# Save the trained Keras model
model.save('churn_model.keras')
print("Trained model saved as 'churn_model.keras'")

Trained model saved as 'churn_model.keras'


In [None]:
# Save the preprocessor
joblib.dump(preprocessor, 'preprocessor.joblib')
print("Preprocessor saved as 'preprocessor.joblib'")

print("\nSetup complete. You can now run 'app.py' to start the server.")

Preprocessor saved as 'preprocessor.joblib'

Setup complete. You can now run 'app.py' to start the server.
