In [19]:
# Project name: Customer Churn Prediction
# Description: This project aims to predict customer churn using machine learning techniques.
# Author: Razvan
# Date: 2023-10-01
# Version: 1.00
# License: MIT License
# Python Version: 3.8+
# ML Framework: TensorFlow 2.x

# Import TensorFlow libraries and load the dataset
%pip install scikit-learn
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [20]:
import pandas as pd

# Load the customer churn dataset
data = pd.read_csv('https://raw.githubusercontent.com/jurnalmontan/ai-ml-rep/13becee3b8fed32def6dae2bfcb4a8c99dfd29a2/churn-dataset.csv') # Replace with your dataset path
print(data.head()) # Display the first few rows of the dataset
# Make sure to run the cell that imports pandas before running this cell.

   CustomerID  Tenure  MonthlyCharges  TotalCharges        Contract  \
0        1001       5            70.0         350.0  Month-to-month   
1        1002      10            85.5         850.5        Two year   
2        1003       3            55.3         165.9        One year   
3        1004       8            90.0         720.0  Month-to-month   
4        1005       2            65.2         130.4        One year   

      PaymentMethod  Churn  
0  Electronic check      1  
1      Mailed check      0  
2  Electronic check      1  
3       Credit card      0  
4  Electronic check      1  


In [21]:
# Preprocess the data
data = data.dropna() # Drop rows with missing values

# Fix target encoding: ensure 'Churn' is 0/1 (if originally Yes/No or similar)
if data['Churn'].dtype == 'object':
    y = (data['Churn'].str.lower() == 'yes').astype(int)
else:
    y = data['Churn']

# Convert categorical variables to numerical using one-hot encoding (excluding 'Churn')
X = data.drop('Churn', axis=1)
X = pd.get_dummies(X, drop_first=True)

print('Target value counts:', y.value_counts())
print('Unique values in target:', np.unique(y))

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Target value counts: Churn
1    3
0    2
Name: count, dtype: int64
Unique values in target: [0 1]


In [None]:
# Ensure target arrays are float32 for TensorFlow
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

model = tf.keras.Sequential([ # Simple Neural Network
    tf.keras.Input(shape=(X_train.shape[1],)), # Input layer
    tf.keras.layers.Dense(1, activation='sigmoid') # Output layer
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Compile the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2) # Train the model
loss, accuracy = model.evaluate(X_test, y_test) # Evaluate the model
print(f'Simple NN Test Accuracy: {accuracy*100:.2f}%') # Print test accuracy

# Print a few predictions to debug output
preds = model.predict(X_test) # Get predictions
print('First 10 predictions:', preds[:10].flatten()) # Print first 10 predictions

Epoch 1/20


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 725ms/step - accuracy: 0.3333 - loss: 0.9717 - val_accuracy: 0.0000e+00 - val_loss: 1.2947
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 725ms/step - accuracy: 0.3333 - loss: 0.9717 - val_accuracy: 0.0000e+00 - val_loss: 1.2947
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.3333 - loss: 0.9699 - val_accuracy: 0.0000e+00 - val_loss: 1.2896
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.3333 - loss: 0.9699 - val_accuracy: 0.0000e+00 - val_loss: 1.2896
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.3333 - loss: 0.9680 - val_accuracy: 0.0000e+00 - val_loss: 1.2846
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.3333 - loss: 0.9680 - val_accuracy: 0.0000e+00 - val_loss: 1.2846
Epoch 4/20
[1m1/1[0m [32m━━━

In [23]:
# Check target encoding and class balance before evaluation
print('y_train value counts:', y_train.value_counts().to_dict())
print('y_test value counts:', y_test.value_counts().to_dict())
print('Unique values in y_train:', np.unique(y_train))
print('Unique values in y_test:', np.unique(y_test))

# Plot training & validation accuracy values
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_acc = lr.score(X_test, y_test)
print(f"Logistic Regression Test Accuracy: {lr_acc*100:.2f}%")

y_train value counts: {1.0: 3, 0.0: 1}
y_test value counts: {0.0: 1}
Unique values in y_train: [0. 1.]
Unique values in y_test: [0.]
Logistic Regression Test Accuracy: 100.00%


In [25]:
# Optimize for deployment
# Optimize the model for deployment by considering techniques such as model pruning, quantization, or simplifying the model architecture without sacrificing too much accuracy. This step is crucial for ensuring the model can be deployed efficiently in a business environment.
# Note: The dataset URL used in this example is for demonstration purposes. Replace it with the actual path to your dataset.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('churn_model.tflite', 'wb') as f:
    f.write(tflite_model)

# Prepare the model for deployment
# Save the trained model in a format suitable for deployment, such as TensorFlow Lite or ONNX. This step ensures that the model can be easily integrated into production systems.
# Note: The dataset URL used in this example is for demonstration purposes. Replace it with the actual path to your dataset.
model.save('churn_model.keras')


INFO:tensorflow:Assets written to: /tmp/tmphp7t9b3_/assets


INFO:tensorflow:Assets written to: /tmp/tmphp7t9b3_/assets


Saved artifact at '/tmp/tmphp7t9b3_'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 8), dtype=tf.float32, name='keras_tensor_14')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  133916285006272: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133916285009088: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1759745438.461091    5223 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1759745438.461120    5223 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-10-06 10:10:38.461346: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmphp7t9b3_
2025-10-06 10:10:38.461819: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-10-06 10:10:38.461832: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmphp7t9b3_
2025-10-06 10:10:38.464564: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-10-06 10:10:38.479280: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmphp7t9b3_
2025-10-06 10:10:38.485356: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 24013 microseconds.
