In [4]:


import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [3]:
# Load the dataset
df = pd.read_csv('Churn.csv')

In [6]:
df.to_string

<bound method DataFrame.to_string of       Gender  Senior Citizen Partner Dependents  tenure Phone Service  \
0     Female               0     Yes         No       1            No   
1     Female               0     Yes         No       1            No   
2       Male               0      No         No      34           Yes   
3       Male               0      No         No       2           Yes   
4       Male               0      No         No      45            No   
...      ...             ...     ...        ...     ...           ...   
7039    Male               0     Yes        Yes      24           Yes   
7040  Female               0     Yes        Yes      72           Yes   
7041  Female               0     Yes        Yes      11            No   
7042    Male               1     Yes         No       4           Yes   
7043    Male               0      No         No      66           Yes   

        Multiple Lines Internet Service Online Security Online Backup  \
0     No phon

In [5]:
# Drop customer ID as it's not useful for prediction
df = df.drop('Customer ID', axis=1)

In [7]:
# Convert target variable to binary (0/1)
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

In [10]:
# Convert categorical variables to numerical
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
	if df[col].nunique() == 2:
		# Example: map binary categorical columns to 0/1
		df[col] = df[col].map({df[col].unique()[0]: 0, df[col].unique()[1]: 1})

In [14]:
# df = df.drop('Customer ID', axis=1)  # Already dropped in a previous cell

# Convert target variable to binary (0/1)
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# Convert categorical variables to numerical
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    if df[col].nunique() == 2:
        # Binary encode columns with only 2 unique values
        df[col] = LabelEncoder().fit_transform(df[col])
    else:
        # One-hot encode columns with more than 2 unique values
        df = pd.get_dummies(df, columns=[col], prefix=col)

In [18]:
# Split into features and target
X = df.drop('Churn', axis=1)
y = df['Churn']

In [28]:

# Ensure there are valid (non-NaN) target values before splitting

valid_indices = y[~y.isna()].index  # Get indices of non-NaN y values
X_nonan = X.loc[valid_indices]      # Use loc for label-based indexing
y_nonan = y.loc[valid_indices]

if len(X_nonan) == 0 or len(y_nonan) == 0:
    print("No valid samples available for splitting. Please check your preprocessing steps and ensure the target variable contains valid values.")
else:
    X_train, X_test, y_train, y_test = train_test_split(
        X_nonan, 
        y_nonan, 
        test_size=0.2, 
        random_state=42, 
        stratify=y_nonan  # Only works if y has at least 2 classes
    )

    # Scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)  # Returns numpy array
    X_test = scaler.transform(X_test)        # Must use same scaler


No valid samples available for splitting. Please check your preprocessing steps and ensure the target variable contains valid values.


In [33]:
from tensorflow.keras import Sequential, layers, metrics

def create_model(input_dim):
    return Sequential([
        layers.Dense(64, activation='relu', input_dim=input_dim),
        layers.Dropout(0.3),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])

# Check if X_train exists and is not empty
if 'X_train' in locals() and X_train is not None and hasattr(X_train, 'shape'):
    model = create_model(X_train.shape[1])
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy', metrics.Precision(), metrics.Recall()])
    model.summary()
else:
    print("X_train is not defined or is empty. Please check your data preprocessing steps.")

X_train is not defined or is empty. Please check your data preprocessing steps.


In [35]:
# Define early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# Check if model, X_train, and y_train are defined and not empty
if 'model' in locals() and model is not None and \
   'X_train' in locals() and X_train is not None and hasattr(X_train, 'shape') and X_train.shape[0] > 0 and \
   'y_train' in locals() and y_train is not None and hasattr(y_train, 'shape') and y_train.shape[0] > 0:
    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=100,
        batch_size=32,
        callbacks=[early_stopping],
        verbose=1
    )
else:
    print("Model or training data is not defined or is empty. Please check your preprocessing steps and ensure you have valid data before training.")

Model or training data is not defined or is empty. Please check your preprocessing steps and ensure you have valid data before training.


In [38]:
# Evaluate on test set only if model and test data are available
if 'model' in locals() and model is not None and \
   'X_test' in locals() and X_test is not None and hasattr(X_test, 'shape') and X_test.shape[0] > 0 and \
   'y_test' in locals() and y_test is not None and hasattr(y_test, 'shape') and y_test.shape[0] > 0:
	test_loss, test_acc, test_precision, test_recall = model.evaluate(X_test, y_test, verbose=0)
	print(f"Test Accuracy: {test_acc:.4f}")
	print(f"Test Precision: {test_precision:.4f}")
	print(f"Test Recall: {test_recall:.4f}")

	# Make predictions
	y_pred = (model.predict(X_test) > 0.5).astype(int)

	# Classification report
	print("\nClassification Report:")
	print(classification_report(y_test, y_pred))

	# Confusion matrix
	print("\nConfusion Matrix:")
	print(confusion_matrix(y_test, y_pred))
else:
	print("Model or test data is not defined or is empty. Please check your preprocessing steps and ensure you have valid data before evaluation.")

Model or test data is not defined or is empty. Please check your preprocessing steps and ensure you have valid data before evaluation.


In [41]:
# Check if model and data are defined and not empty before training and evaluation
if 'model' in locals() and model is not None and \
   'X_train' in locals() and X_train is not None and hasattr(X_train, 'shape') and X_train.shape[0] > 0 and \
   'y_train' in locals() and y_train is not None and hasattr(y_train, 'shape') and y_train.shape[0] > 0 and \
   'X_test' in locals() and X_test is not None and hasattr(X_test, 'shape') and X_test.shape[0] > 0 and \
   'y_test' in locals() and y_test is not None and hasattr(y_test, 'shape') and y_test.shape[0] > 0:
    # First ensure the model is trained
    history = model.fit(X_train, y_train, epochs=10, validation_split=0.2)

    # Then evaluate
    print("\nEvaluation:")
    print(model.metrics_names)  # Debug output
    results = model.evaluate(X_test, y_test, verbose=0)

    if len(results) == 4:  # If you get 4 values as expected
        test_loss, test_acc, test_precision, test_recall = results
        print(f"Test Accuracy: {test_acc:.4f}")
        print(f"Test Precision: {test_precision:.4f}")
        print(f"Test Recall: {test_recall:.4f}")
    else:
        print("Unexpected number of metrics returned:", results)

    # Predictions (this part should work regardless)
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
else:
    print("Model or data is not defined or is empty. Please check your preprocessing steps and ensure you have valid data before training and evaluation.")

Model or data is not defined or is empty. Please check your preprocessing steps and ensure you have valid data before training and evaluation.


In [43]:
# List to collect error messages
errors = []

# Check each condition individually
if 'model' not in locals():
    errors.append("Model not defined")
elif model is None:
    errors.append("Model is None")

if 'X_train' not in locals():
    errors.append("X_train not defined")
elif X_train is None:
    errors.append("X_train is None")
elif not hasattr(X_train, 'shape'):
    errors.append("X_train has no shape attribute")
elif X_train.shape[0] == 0:
    errors.append("X_train is empty (0 rows)")

if 'y_train' not in locals():
    errors.append("y_train not defined")
elif y_train is None:
    errors.append("y_train is None")
elif not hasattr(y_train, 'shape'):
    errors.append("y_train has no shape attribute")
elif y_train.shape[0] == 0:
    errors.append("y_train is empty (0 rows)")

if 'X_test' not in locals():
    errors.append("X_test not defined")
elif X_test is None:
    errors.append("X_test is None")
elif not hasattr(X_test, 'shape'):
    errors.append("X_test has no shape attribute")
elif X_test.shape[0] == 0:
    errors.append("X_test is empty (0 rows)")

if 'y_test' not in locals():
    errors.append("y_test not defined")
elif y_test is None:
    errors.append("y_test is None")
elif not hasattr(y_test, 'shape'):
    errors.append("y_test has no shape attribute")
elif y_test.shape[0] == 0:
    errors.append("y_test is empty (0 rows)")

# If any errors found, print them and exit
if errors:
    print("Validation failed with the following errors:")
    for error in errors:
        print(f"- {error}")
    print("Please fix these issues before proceeding.")
else:
    print("All validations passed. Proceeding with model training and evaluation.")
    
    # Proceed with your training and evaluation code
    history = model.fit(X_train, y_train, epochs=10, validation_split=0.2)
    
    # Evaluation code
    print("\nEvaluation:")
    print("Metric names:", model.metrics_names)
    results = model.evaluate(X_test, y_test, verbose=0)
    test_loss, test_acc, test_precision, test_recall = results
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test Precision: {test_precision:.4f}")
    print(f"Test Recall: {test_recall:.4f}")
    
    # Predictions
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

Validation failed with the following errors:
- Model not defined
- X_train not defined
- y_train not defined
- X_test not defined
- y_test not defined
Please fix these issues before proceeding.


In [45]:
def prepare_data():
    # Your preprocessing code here
    return X_train, y_train, X_test, y_test

def build_model(input_shape):
    return create_model(input_shape)

def main():
    # 1. Prepare data
    X_train, y_train, X_test, y_test = prepare_data()
    
    # 2. Build model
    model = build_model(X_train.shape[1])
    
    # 3. Train and evaluate
    history = model.fit(X_train, y_train, epochs=10, validation_split=0.2)
    
    # Evaluation code
    print("\nEvaluation:")
    results = model.evaluate(X_test, y_test, verbose=0)
    test_loss, test_acc, test_precision, test_recall = results
    print(f"Test Accuracy: {test_acc:.4f}")

print(locals().keys())

dict_keys(['__name__', '__doc__', '__package__', '__loader__', '__spec__', '__builtin__', '__builtins__', '_ih', '_oh', '_dh', 'In', 'Out', 'get_ipython', 'exit', 'quit', 'open', '_', '__', '___', '__vsc_ipynb_file__', '_i', '_ii', '_iii', '_i1', '_i2', '_exit_code', 'pd', 'np', '_i3', 'df', '_i4', 'tf', 'train_test_split', 'StandardScaler', 'LabelEncoder', 'classification_report', 'confusion_matrix', 'accuracy_score', '_i5', '_i6', '_6', '_i7', '_i8', '_i9', '_i10', 'categorical_cols', 'col', '_i11', '_i12', '_i13', '_i14', '_i15', '_i16', 'X', 'y', '_i17', '_i18', '_i19', '_i20', 'X_clean', 'y_clean', '_i21', '_i22', 'df_clean', '_i23', '_i24', '_i25', 'X_nonan', 'y_nonan', '_i26', 'valid_indices', '_i27', '_i28', '_i29', '_i30', 'create_model', '_i31', '_i32', 'Sequential', 'layers', 'metrics', '_i33', '_i34', 'early_stopping', '_i35', '_i36', '_i37', '_i38', '_i39', '_i40', '_i41', '_i42', '_i43', 'errors', 'error', '_i44', 'prepare_data', 'build_model', 'main', '_i45'])


In [46]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

# 1. Data Loading and Preprocessing
def load_and_preprocess_data(filepath):
    # Load data
    df = pd.read_csv(filepath)
    
    # Preprocessing
    df = df.drop('Customer ID', axis=1)
    df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})
    
    # Handle categorical variables
    categorical_cols = df.select_dtypes(include=['object']).columns
    for col in categorical_cols:
        if df[col].nunique() == 2:
            df[col] = LabelEncoder().fit_transform(df[col])
        else:
            df = pd.get_dummies(df, columns=[col], prefix=col)
    
    return df

# 2. Model Creation
def create_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
    )
    return model

# 3. Main Execution
def main():
    try:
        # Step 1: Prepare data
        print("Loading and preprocessing data...")
        df_processed = load_and_preprocess_data('Churn.csv')
        X = df_processed.drop('Churn', axis=1)
        y = df_processed['Churn']
        
        # Step 2: Train-test split
        print("Splitting data into train/test sets...")
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y)
        
        # Step 3: Feature scaling
        print("Scaling features...")
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        # Step 4: Create model
        print("Creating model...")
        input_shape = X_train.shape[1]
        model = create_model(input_shape)
        model.summary()
        
        # Step 5: Train model
        print("Training model...")
        history = model.fit(
            X_train, y_train,
            epochs=10,
            batch_size=32,
            validation_split=0.2,
            verbose=1
        )
        
        # Step 6: Evaluate model
        print("\nEvaluating model...")
        print("Metric names:", model.metrics_names)
        test_loss, test_acc, test_precision, test_recall = model.evaluate(X_test, y_test, verbose=0)
        
        print(f"\nTest Accuracy: {test_acc:.4f}")
        print(f"Test Precision: {test_precision:.4f}")
        print(f"Test Recall: {test_recall:.4f}")
        
        # Step 7: Generate predictions
        y_pred = (model.predict(X_test) > 0.5).astype(int)
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred))
        print("\nConfusion Matrix:")
        print(confusion_matrix(y_test, y_pred))
        
    except Exception as e:
        print(f"\nError encountered: {str(e)}")
        print("Please check:")
        print("- Data file exists and is in correct format")
        print("- All required columns are present")
        print("- No missing values in critical fields")

if __name__ == "__main__":
    main()

Loading and preprocessing data...
Splitting data into train/test sets...
Scaling features...
Creating model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training model...
Epoch 1/10
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.5433 - loss: 0.8386 - precision: 0.3032 - recall: 0.4816 - val_accuracy: 0.7524 - val_loss: 0.4613 - val_precision: 0.5870 - val_recall: 0.1831
Epoch 2/10
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8412 - loss: 0.3479 - precision: 0.8349 - recall: 0.5112 - val_accuracy: 0.7755 - val_loss: 0.4588 - val_precision: 0.5875 - val_recall: 0.4780
Epoch 3/10
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9426 - loss: 0.1671 - precision: 0.9289 - recall: 0.8534 - val_accuracy: 0.7595 - val_loss: 0.5129 - val_precision: 0.5405 - val_recall: 0.5424
Epoch 4/10
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9790 - loss: 0.0739 - precision: 0.9635 - recall: 0.9582 - val_accuracy: 0.7533 - val_loss: 0.5831 - val_precision: 0.5272 - val_recall: 0.5593
Epoch

In [None]:
# Split into features and target
X = df.drop('Churn', axis=1)
y = df['Churn']



# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ValueError: Input y contains NaN.