In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import pickle  # Using the pickle library as requested

# Ensure consistent results for reproducibility
tf.random.set_seed(42)

# --- Step 1: Data Loading and Preprocessing (Same as Day 1) ---
print("--- Step 1: Data Loading and Preprocessing ---")
try:
    df = pd.read_csv('/content/Telco_Cusomer_Churn.csv')
except FileNotFoundError:
    print("Error: Dataset file not found. Please ensure 'WA_Fn-UseC_-Telco-Customer-Churn.csv' is in the same directory.")
    exit()

# Handle TotalCharges and split data
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)
X = df.drop(columns=['Churn', 'customerID'])
y = df['Churn'].map({'Yes': 1, 'No': 0})

# Identify features for preprocessing
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Fit and transform the data
X_train_processed = preprocessor.fit_transform(X_train)
X_val_processed = preprocessor.transform(X_val)
X_test_processed = preprocessor.transform(X_test)
input_shape = X_train_processed.shape[1]

print("Data preprocessing complete. Proceeding to Day 2 training...")
print("-" * 50)

# --- Step 2: Training Three New Models ---

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

# Model 1: A new simple architecture
print("\n--- Model 1: A new simple model ---")
model1_d2 = Sequential([
    Dense(20, activation='relu', input_shape=(input_shape,)),
    Dense(1, activation='sigmoid')
])
model1_d2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history1_d2 = model1_d2.fit(X_train_processed, y_train,
                            epochs=100,
                            validation_data=(X_val_processed, y_val),
                            callbacks=[early_stopping],
                            verbose=1)

# Model 2: A new intermediate architecture
print("\n--- Model 2: A new intermediate model ---")
model2_d2 = Sequential([
    Dense(32, activation='relu', input_shape=(input_shape,)),
    Dropout(0.1),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
model2_d2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history2_d2 = model2_d2.fit(X_train_processed, y_train,
                            epochs=100,
                            validation_data=(X_val_processed, y_val),
                            callbacks=[early_stopping],
                            verbose=1)

# Model 3: A new advanced architecture
print("\n--- Model 3: A new advanced model ---")
model3_d2 = Sequential([
    Dense(64, activation='relu', input_shape=(input_shape,)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])
model3_d2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history3_d2 = model3_d2.fit(X_train_processed, y_train,
                            epochs=100,
                            validation_data=(X_val_processed, y_val),
                            callbacks=[early_stopping],
                            verbose=1)

print("-" * 50)

# --- Step 3: Evaluate and Select the Best Model ---
print("\n--- Step 3: Evaluating and Selecting the Best Model ---")
_, acc1_d2 = model1_d2.evaluate(X_test_processed, y_test, verbose=0)
_, acc2_d2 = model2_d2.evaluate(X_test_processed, y_test, verbose=0)
_, acc3_d2 = model3_d2.evaluate(X_test_processed, y_test, verbose=0)

models = {
    "Model 1": {"model": model1_d2, "accuracy": acc1_d2},
    "Model 2": {"model": model2_d2, "accuracy": acc2_d2},
    "Model 3": {"model": model3_d2, "accuracy": acc3_d2}
}

best_model_name = max(models, key=lambda name: models[name]["accuracy"])
best_model = models[best_model_name]["model"]
best_accuracy = models[best_model_name]["accuracy"]

print(f"Model 1 Test Accuracy: {acc1_d2:.4f}")
print(f"Model 2 Test Accuracy: {acc2_d2:.4f}")
print(f"Model 3 Test Accuracy: {acc3_d2:.4f}")
print(f"\nConclusion: The best model is '{best_model_name}' with an accuracy of {best_accuracy:.4f} on the test set.")

# --- Step 4: Saving the Best Model and Preprocessor ---
print("\n--- Step 4: Saving the Best Model and Preprocessor ---")

# The Keras model is saved using its native method
best_model.save('best_ann_model.h5')
print("✅ Best Keras model saved as 'best_ann_model.h5'")

# The preprocessor is saved using the pickle library as requested
with open('preprocessor.pkl', 'wb') as f:
    pickle.dump(preprocessor, f)

print("✅ Preprocessor saved as 'preprocessor.pkl' using pickle")



--- Step 1: Data Loading and Preprocessing ---
Data preprocessing complete. Proceeding to Day 2 training...
--------------------------------------------------

--- Model 1: A new simple model ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.7266 - loss: 0.5376 - val_accuracy: 0.8000 - val_loss: 0.4299
Epoch 2/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7971 - loss: 0.4355 - val_accuracy: 0.8114 - val_loss: 0.4183
Epoch 3/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7996 - loss: 0.4280 - val_accuracy: 0.8123 - val_loss: 0.4149
Epoch 4/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7981 - loss: 0.4252 - val_accuracy: 0.8152 - val_loss: 0.4136
Epoch 5/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7989 - loss: 0.4235 - val_accuracy: 0.8123 - val_loss: 0.4130
Epoch 6/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7983 - loss: 0.4219 - val_accuracy: 0.8133 - val_loss: 0.4126
Epoch 7/100
[1m154/1



Model 1 Test Accuracy: 0.7867
Model 2 Test Accuracy: 0.7877
Model 3 Test Accuracy: 0.7810

Conclusion: The best model is 'Model 2' with an accuracy of 0.7877 on the test set.

--- Step 4: Saving the Best Model and Preprocessor ---
✅ Best Keras model saved as 'best_ann_model.h5'
✅ Preprocessor saved as 'preprocessor.pkl' using pickle
