<a href="https://colab.research.google.com/github/sushmitha6145/CodSoft/blob/main/Task3_CustomerChurn_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[DataSet](https://www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load the dataset
df = pd.read_csv('/content/Churn_Modelling.csv')

# Check for a column that seems like the target variable (e.g., binary)
target_column = None
for column in df.columns:
    if df[column].nunique() == 2 and df[column].dtype == 'int64':
        target_column = column
        break

# Verify target column existence
if target_column is None:
    raise KeyError("No suitable target variable found in the dataset. Please check the column types and values.")

# Handle missing values
df.fillna(method='ffill', inplace=True)

# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

# Split the dataset into features and target
X = df.drop(target_column, axis=1)
y = df[target_column]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
print("Logistic Regression Performance:")
print(classification_report(y_test, y_pred_lr, zero_division=1))  # Suppress warning
print("AUC-ROC:", roc_auc_score(y_test, lr_model.predict_proba(X_test)[:, 1]))

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Performance:")
print(classification_report(y_test, y_pred_rf, zero_division=1))  # Suppress warning
print("AUC-ROC:", roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1]))

# Gradient Boosting (XGBoost)
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
print("XGBoost Performance:")
print(classification_report(y_test, y_pred_xgb, zero_division=1))  # Suppress warning
print("AUC-ROC:", roc_auc_score(y_test, xgb_model.predict_proba(X_test)[:, 1]))

# Hyperparameter Tuning for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, scoring='roc_auc', n_jobs=-1)
grid_search.fit(X_train, y_train)
best_rf_model = grid_search.best_estimator_
print("Best parameters for Random Forest:", grid_search.best_params_)

# Evaluate the tuned model
y_pred_best_rf = best_rf_model.predict(X_test)
print("Tuned Random Forest Performance:")
print(classification_report(y_test, y_pred_best_rf, zero_division=1))  # Suppress warning
print("AUC-ROC:", roc_auc_score(y_test, best_rf_model.predict_proba(X_test)[:, 1]))


Logistic Regression Performance:
              precision    recall  f1-score   support

           0       1.00      0.00      0.00       573
           1       0.71      1.00      0.83      1427

    accuracy                           0.71      2000
   macro avg       0.86      0.50      0.42      2000
weighted avg       0.80      0.71      0.59      2000

AUC-ROC: 0.5124261469466325
Random Forest Performance:
              precision    recall  f1-score   support

           0       0.21      0.01      0.02       573
           1       0.71      0.98      0.83      1427

    accuracy                           0.71      2000
   macro avg       0.46      0.50      0.42      2000
weighted avg       0.57      0.71      0.60      2000

AUC-ROC: 0.5194876667999722
XGBoost Performance:
              precision    recall  f1-score   support

           0       0.26      0.10      0.14       573
           1       0.71      0.89      0.79      1427

    accuracy                           0.66  