In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder


In [9]:

# Load dataset
data = pd.read_csv("Churn_Modelling.csv")

# Drop unnecessary columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)


In [10]:

# Convert categorical columns to numerical
data = pd.get_dummies(data, drop_first=True)

# Split data into features and target
X = data.drop('Exited', axis=1)
y = data['Exited']


In [11]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [12]:

# Logistic Regression Model
LR = LogisticRegression()
LR.fit(X_train_scaled, y_train)

# Random Forest Model
rf = RandomForestClassifier()
rf.fit(X_train_scaled, y_train)


In [13]:

# Prediction on test set
y_pred_lr = LR.predict(X_test_scaled)
y_pred_rf = rf.predict(X_test_scaled)

# Calculate accuracy for both models
accuracy_lr = accuracy_score(y_test, y_pred_lr)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print(f"Logistic Regression Accuracy: {accuracy_lr}")
print(f"Random Forest Accuracy: {accuracy_rf}")

Logistic Regression Accuracy: 0.809
Random Forest Accuracy: 0.868


In [14]:
input_data = pd.DataFrame({
    'CreditScore': [750],
    'Age': [40],
    'Tenure': [5],
    'Balance': [50000],
    'NumOfProducts': [2],
    'HasCrCard': [1],
    'IsActiveMember': [1],
    'EstimatedSalary': [120000],
    'Geography_Germany': [0],
    'Geography_Spain': [1],  # Assuming customer is from Spain
    'Gender_Male': [0],  # Assuming customer is Female
})

# Standardize input data (apply the same transformation as the training data)
input_data_scaled = scaler.transform(input_data)

# Get predictions
lr_churn_prediction = LR.predict(input_data_scaled)[0]
rf_churn_prediction = rf.predict(input_data_scaled)[0]

# Output the predictions
if lr_churn_prediction == 1:
    print("Logistic Regression: This customer is likely to churn.")
else:
    print("Logistic Regression: This customer is likely to stay.")

if rf_churn_prediction == 1:
    print("Random Forest: This customer is likely to churn.")
else:
    print("Random Forest: This customer is likely to stay.")

Logistic Regression: This customer is likely to stay.
Random Forest: This customer is likely to stay.
