## Import data and libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import numpy as np
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout, BatchNormalization, LSTM, Activation
from tensorflow.keras.activations import swish
from tensorflow.keras.utils import plot_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import HeNormal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE
import warnings
from xgboost import XGBClassifier
warnings.filterwarnings("ignore")
plt.style.use('ggplot')

In [None]:
df_master = pd.read_csv("../data/Train.csv")
df = df_master.copy()
df.head()

In [None]:
df.shape

## EDA

In [None]:
df.info()

In [None]:
round(df.isna().sum().sort_values(ascending=False) / len(df), 4) * 100

In [None]:
df.TOP_PACK.unique()

In [None]:
df.REGION.unique() 

In [None]:
df.MRG.unique()

In [None]:
fig, axes = plt.subplots(3, 4, figsize=(20, 12))

sns.kdeplot(data=df, x="MONTANT", ax=axes[0, 0])
sns.kdeplot(data=df, x="FREQUENCE_RECH", ax=axes[0, 1])
sns.kdeplot(data=df, x="REVENUE", ax=axes[0, 2])
sns.kdeplot(data=df, x="ARPU_SEGMENT", ax=axes[0, 3])

sns.kdeplot(data=df, x="FREQUENCE", ax=axes[1, 0])
sns.kdeplot(data=df, x="DATA_VOLUME", ax=axes[1, 1])
sns.kdeplot(data=df, x="ON_NET", ax=axes[1, 2])
sns.kdeplot(data=df, x="ORANGE", ax=axes[1, 3])

sns.kdeplot(data=df, x="TIGO", ax=axes[2, 0])
sns.kdeplot(data=df, x="REGULARITY", ax=axes[2, 1])
sns.kdeplot(data=df, x="FREQ_TOP_PACK", ax=axes[2, 2])

plt.tight_layout()
plt.show()

In [None]:
sns.countplot(data=df, x="CHURN")
plt.show()

## Data preprocessing

In [None]:
# Resampling

In [None]:
X = df.drop(["user_id", "MRG", "ZONE1", "ZONE2", "CHURN"], axis=1)
y = df.CHURN

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)
print(X_train.shape)
print(X_test.shape)

In [None]:
categorical_cols = X.select_dtypes(include=['object', 'category']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        # Impute missing values in categorical columns and then apply one-hot encoding
        ('cat', Pipeline([
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
        ]), categorical_cols),
        # Impute missing values in numerical columns and then apply scaling
        ('num', Pipeline([
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), numerical_cols)
    ], remainder='passthrough')

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

## Model Training

In [None]:
xgb = XGBClassifier(n_estimators=100, subsample=0.8, colsample_bynode=0.8, n_jobs=-1)
rf = RandomForestClassifier(random_state=42)


In [None]:
xgb.fit(X_train_processed, y_train)

In [None]:
rf.fit(X_train_processed, y_train)

In [None]:
y_pred_xgb = xgb.predict(X_test_processed)
xgb.score(X_test_processed, y_test)

In [None]:
y_pred_rf = rf.predict(X_test_processed)
rf.score(X_test_processed, y_test)

In [None]:
plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix(y_test, y_pred_xgb), annot=True, fmt='.1f')
plt.show()

In [None]:
print(classification_report(y_test, y_pred_xgb))

In [None]:
model = Sequential()

# Input layer
model.add(Dense(256, input_dim=X_train_processed.shape[1], activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())

# Hidden layers
model.add(Dense(128, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(0.001)))
model.add(Dropout(0.3))  
model.add(BatchNormalization())

model.add(Dense(64, activation='relu', kernel_initializer=HeNormal(), kernel_regularizer=l2(0.001)))
model.add(Dropout(0.3))  
model.add(BatchNormalization())

# Output layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

model.summary()

In [None]:
plot_model(model, to_file="../dnn.png")

In [None]:
#optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001, clipvalue=0.5)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
with tf.device('/cpu:0'):
    model.fit(X_train_processed, y_train, epochs=10, batch_size=64, validation_split=0.1) #validation_data=(X_test_processed, y_test))

In [None]:
model.save("../models/dnn.h5")

In [None]:
loss, accuracy = model.evaluate(X_test_processed, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

In [None]:
model_2 = Sequential()
model_2.add(LSTM(8, return_sequences=True, input_shape=(X_train_processed.shape[1], 1)))
model.add(Activation(swish))
model_2.add(Dropout(0.2))
model_2.add(LSTM(8, return_sequences=True))
model_2.add(Dropout(0.2))
model.add(Activation(swish))
model_2.add(Dense(1, activation='sigmoid'))
model_2.summary()

In [None]:
model_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Adjust loss and metrics as per your task

In [None]:
model_2.fit(X_train_processed, y_train, epochs=10, batch_size=64, validation_split=0.05)

In [None]:
model_test = Sequential()

model_test.add(SimpleRNN(4, return_sequences=True, input_shape=(X_train_processed.shape[1], 1)))
model_test.add(Activation(swish))
model_test.add(Dropout(0.2))

model_test.add(SimpleRNN(4, return_sequences=False))
model_test.add(Activation(swish))
model_test.add(Dropout(0.2))

# Output layer for binary classification
model_test.add(Dense(1, activation='sigmoid'))
model_test.summary()

In [None]:
plot_model(model, to_file="../rnn.png")

In [None]:
model_test.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model_test.fit(X_train_processed, y_train, epochs=10, batch_size=64, validation_split=0.05)

In [None]:
model_2.save("../models/rnn.h5")

In [None]:
loss_2, accuracy_2 = model_2.evaluate(X_test_processed, y_test)
print(f"Test Loss: {loss_2}, Test Accuracy: {accuracy_2}")