# Project 1 – Telecom Customer Churn Prediction with PyTorch

In [1]:
#import library
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('../data/churn_cleaned.csv')

In [3]:
df.head()

Unnamed: 0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Male,Partner_Yes,Dependents_Yes,PhoneService_Yes,...,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_Yes,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,1,0,29.85,29.85,0,False,True,False,False,...,False,False,False,False,False,False,True,False,True,False
1,0,34,1,56.95,1889.5,0,True,False,False,True,...,False,False,False,False,False,False,False,False,False,True
2,0,2,0,53.85,108.15,1,True,False,False,True,...,False,False,False,False,False,False,True,False,False,True
3,0,45,1,42.3,1840.75,0,True,False,False,False,...,False,True,False,False,False,False,False,False,False,False
4,0,2,0,70.7,151.65,1,False,False,False,True,...,False,False,False,False,False,False,True,False,True,False


## Split into Train, Validation, Test

In [4]:
X = df.drop('Churn', axis=1).values  # Features
y = df['Churn'].values               # Target

In [32]:
# First split train + temp (validation+test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Shapes check
print("Train:", X_train.shape, y_train.shape)
print("Test:", X_test.shape, y_test.shape)


Train: (5634, 29) (5634,)
Test: (1409, 29) (1409,)


In [33]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


## Logistic Regression

In [34]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [35]:
log_reg = LogisticRegression(
    max_iter=1000,
    class_weight='balanced'   # IMPORTANT for churn imbalance
)

log_reg.fit(X_train, y_train.ravel())


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [36]:
y_pred_lr = log_reg.predict(X_test)

In [37]:
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))

Accuracy: 0.7388218594748048
[[747 288]
 [ 80 294]]
              precision    recall  f1-score   support

           0       0.90      0.72      0.80      1035
           1       0.51      0.79      0.62       374

    accuracy                           0.74      1409
   macro avg       0.70      0.75      0.71      1409
weighted avg       0.80      0.74      0.75      1409



## Convert target to tensors

In [38]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor  = torch.tensor(X_test, dtype=torch.float32)

In [39]:
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor  = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


In [40]:
print(X_train_tensor.shape)
print(y_train_tensor.shape)

torch.Size([5634, 29])
torch.Size([5634, 1])


## Create Dataset & DataLoader

In [41]:
# Create datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor, y_test_tensor)

In [42]:
# Create DataLoader
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [43]:
#For test
for X_batch, y_batch in train_loader:
    print(X_batch.shape, y_batch.shape)
    break

torch.Size([32, 29]) torch.Size([32, 1])


## Build Neural Network Model

In [44]:
#Define the model
class ChurnNN(nn.Module):
    def __init__(self, input_dim):
        super(ChurnNN, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, 16)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [45]:
# Initialize the model
input_dim = X_train_tensor.shape[1]
model = ChurnNN(input_dim)
model

ChurnNN(
  (fc1): Linear(in_features=29, out_features=16, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [46]:
# Define loss & optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [47]:
#Training Loop
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 0.6558
Epoch 10, Loss: 0.6191
Epoch 20, Loss: 0.5893
Epoch 30, Loss: 0.5655
Epoch 40, Loss: 0.5461
Epoch 50, Loss: 0.5299
Epoch 60, Loss: 0.5158
Epoch 70, Loss: 0.5031
Epoch 80, Loss: 0.4917
Epoch 90, Loss: 0.4815


In [51]:
model.eval()

with torch.no_grad():
    logits = model(X_test_tensor)
    probs = torch.sigmoid(logits)
    y_pred = (probs >= 0.5).float()

In [52]:
model.eval()

ChurnNN(
  (fc1): Linear(in_features=29, out_features=16, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [53]:
with torch.no_grad():
    outputs = model(X_test_tensor)

In [54]:
with torch.no_grad():
    test_probs = torch.sigmoid(model(X_test_tensor))
    for t in [0.3, 0.4, 0.5]:
        preds = (probs >= t).float()


In [55]:
from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(y_test, y_pred.numpy()))


Accuracy: 0.7792760823278921


In [56]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred.numpy()))


[[925 110]
 [201 173]]


In [57]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred.numpy()))


              precision    recall  f1-score   support

           0       0.82      0.89      0.86      1035
           1       0.61      0.46      0.53       374

    accuracy                           0.78      1409
   macro avg       0.72      0.68      0.69      1409
weighted avg       0.77      0.78      0.77      1409

