In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Mount Google Drive
drive.mount('/content/drive')
file_path = '------------------------'
# Load dataset
df = pd.read_csv(file_path)
# too see max columns
pd.set_option('display.max_columns',None)
# print dataframe
df.head()

In [None]:
# Drop duplicates
df.drop_duplicates(inplace=True)

# Remove cx_id
df = df.drop(columns="cx_id")

# Ensure numeric columns are clean
numeric_cols = ['tenure', 'monthly_bill', 'total_bill']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    df[col] = df[col].astype("float")

print("Missing values per column after cleansing:\n")
print(df.isnull().sum())
df.head()

In [None]:
# Check again how many missing values
print("Missing values before filling:\n", df.isnull().sum())

# Fill numeric columns with median
df['tenure'] = df['tenure'].fillna(df['tenure'].mean().round(0))
df['monthly_bill'] = df['monthly_bill'].fillna(df['monthly_bill'].mean().round(2))
df['total_bill'] = df['total_bill'].fillna(df['total_bill'].mean().round(2))

# Fill categorical columns with mode
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

# Check again after filling
print("\nMissing values after filling:\n", df.isnull().sum())
df.head()

In [None]:
#numerical variables

num = list(df.select_dtypes(include=['int64','float64']).keys())
#categorical variables
cat = list(df.select_dtypes(include='O').keys())

print(cat)
print(num)

In [None]:
# value_counts of the categorical columns
for i in cat:
    print(df[i].value_counts())


In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
# Encoding categorical variables

df['multiple_connections'] = le.fit_transform(df['multiple_connections'])
# df['multiple_connections'] = df['multiple_connections'].replace({'No':0, 'No phone service':0, 'Yes':1})
# First convert all values to string and lowercase (to avoid issues)
df['senior_citizen'] = df['senior_citizen'].astype(str).str.lower()

# Replace values
df['senior_citizen'] = df['senior_citizen'].replace({
    'yes': 1, 'y': 1,
    'no': 0,  'n': 0
})

# Finally, ensure integers
df['senior_citizen'] = df['senior_citizen'].astype(int)
df.head()

In [None]:
# value_counts of the categorical columns
for i in cat:
    print(df[i].value_counts())

In [None]:
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
for i in cat:
    df[i] = label.fit_transform(df[i])
from tensorflow.keras.utils import to_categorical
# independent and dependent variables
x = df.drop('churn',axis=1)
y = to_categorical(df.churn)

In [None]:
# splitting data into training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

In [None]:
# Scaling data

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping

# initializing ann
model = Sequential()

# adding the first input layer and the first hidden layer
model.add(Dense(17, kernel_initializer = 'normal', activation = 'relu', input_shape = (17, )))

# adding batch normalization and dropout layer
model.add(Dropout(rate = 0.1))
model.add(BatchNormalization())

# adding the third hidden layer
model.add(Dense(12, kernel_initializer = 'normal', activation = 'relu'))

# adding batch normalization and dropout layer
model.add(Dropout(rate = 0.2))
model.add(BatchNormalization())

# adding the fifth hidden layer
model.add(Dense(7, kernel_initializer = 'normal', activation = 'relu'))

# adding batch normalization and dropout layer
model.add(Dropout(rate = 0.1))
model.add(BatchNormalization())

# adding the output layer
model.add(Dense(2, kernel_initializer = 'normal', activation = 'sigmoid'))

# compiling the model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# # fitting the model to the training set

# model_history = model.fit(X_train, y_train, validation_split = 0.20, validation_data = (X_test, y_test), epochs = 100)

# define early stopping
early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,            # stop after 10 epochs of no improvement
                           restore_best_weights=True)

# fit model
model_history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt   # usually used together
plt.figure(figsize = (12, 6))

train_loss = model_history.history['loss']
val_loss = model_history.history['val_loss']
# epoch = range(1, 101)
epoch = range(1, len(train_loss) + 1)
sns.lineplot(x=epoch, y=train_loss, label = 'Training Loss')
sns.lineplot(x=epoch, y=val_loss, label = 'Validation Loss')
plt.title('Training and Validation Loss\n')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (12, 6))

train_loss = model_history.history['accuracy']
val_loss = model_history.history['val_accuracy']
epoch = range(1, len(train_loss) + 1)
sns.lineplot(x=epoch, y=train_loss, label = 'Training accuracy')
sns.lineplot(x=epoch, y=val_loss, label = 'Validation accuracy')
plt.title('Training and Validation Accuracy\n')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
acc = model.evaluate(X_test, y_test)[1]
print(f'Accuracy of model is {acc}')

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, show_shapes = True)