In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense


base_df = pd.read_csv("/home/samir/Desktop/rudraAnalytics/sub_projects/churn/data/data.csv")
pd.set_option("display.max_columns", None)

df = base_df.copy()
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['SeniorCitizen'] = df['SeniorCitizen'].astype(str)


def one_hot_encode(df, encode_set=[], dont_encode=[]):
    encoded_df = df.copy()
    for column in encode_set:
        if column in dont_encode:
            continue
        # print(len(df[column].unique()))
        if df[column].dtype == 'object':
            if len(df[column].unique()) == 2:
                # Binary encoding (0 and 1)
                encoded_df[column] = pd.get_dummies(df[column], drop_first=True)
            else:
                # One-hot encoding and using 0 and 1 instead of True and False
                one_hot_encoded = pd.get_dummies(df[column], prefix=column, drop_first=False)
                one_hot_encoded.columns = [f"{column}{i+1}" for i in range(one_hot_encoded.shape[1])]
                encoded_df = pd.concat([encoded_df, one_hot_encoded], axis=1)
                encoded_df.drop(column, axis=1, inplace=True)
                # print(encoded_df)

    return encoded_df

features = [
    'customerID',
    'gender',
    'SeniorCitizen',
    'Partner',
    'Dependents',
    'tenure',
    'PhoneService',
    'MultipleLines',
    'InternetService',
    'OnlineSecurity',
    'OnlineBackup',
    'DeviceProtection',
    'TechSupport',
    'StreamingTV',
    'StreamingMovies',
    'Contract',
    'PaperlessBilling',
    'PaymentMethod',
    'MonthlyCharges',
    'TotalCharges',
    'Churn'
]
dont_label = ['customerID', 'tenure', 'MonthlyCharges', 'TotalCharges']

encoded_df = one_hot_encode(df.drop('customerID', axis=1), features, dont_label)
encoded_df['TotalCharges'].fillna(encoded_df['TotalCharges'].mean(), inplace=True)

X_cnn = np.expand_dims(encoded_df.drop('Churn', axis=1).values, axis=-1)
y_cnn = encoded_df['Churn'].values

# Split the data
X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(X_cnn, y_cnn, test_size=0.2, random_state=42)

# Standardize the data using StandardScaler
scaler_cnn = StandardScaler()
X_train_cnn = scaler_cnn.fit_transform(X_train_cnn.reshape(X_train_cnn.shape[0], -1)).reshape(X_train_cnn.shape)
X_test_cnn = scaler_cnn.transform(X_test_cnn.reshape(X_test_cnn.shape[0], -1)).reshape(X_test_cnn.shape)

# Build the 1D CNN model
model_cnn = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model_cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model_cnn.fit(X_train_cnn, y_train_cnn, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test_cnn))

# Make predictions on the test set
y_pred_cnn = (model_cnn.predict(X_test_cnn) > 0.5).astype(int)

# Evaluate the model
accuracy_cnn = accuracy_score(y_test_cnn, y_pred_cnn)
print(f"CNN Test Accuracy: {accuracy_cnn * 100:.2f}%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CNN Test Accuracy: 81.33%


CNN Test Accuracy: 81.33%