In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.compose import ColumnTransformer

# 1. Load dataset
df = pd.read_csv("S:/Sameet Patil - SEM 7/LP3/codes/Churn_Modelling.csv")

# 2. Features (X) & Target (y)
X = df.drop(["RowNumber", "CustomerId", "Surname", "Exited"], axis=1)
y = df["Exited"]

# Encode categorical columns (Geography, Gender)
ct = ColumnTransformer(
    transformers=[("encoder", OneHotEncoder(drop="first"), ["Geography", "Gender"])],
    remainder="passthrough",
)
X = ct.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Normalize (standardize numerical values)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# 4. Build Neural Network
model = Sequential()
model.add(Dense(16, activation="relu"))  # improvement: more neurons = better learning
model.add(Dense(16, activation="relu"))
model.add(Dense(1, activation="sigmoid"))  # binary output

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

# 5. Evaluation
y_pred = (model.predict(X_test) > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Accuracy: 0.8565
Confusion Matrix:
 [[1541   66]
 [ 221  172]]
