In [1]:
!pip install pandas




In [6]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
data = pd.read_csv("/content/Churn_Modelling.csv")
features = data.drop("Exited", axis=1)
target = data["Exited"]
numerical_cols = features.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = features.select_dtypes(include=['object']).columns
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                     ('model', model)])
    model_pipeline.fit(X_train, y_train)
    y_pred = model_pipeline.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return accuracy, precision, recall, f1
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_accuracy, rf_precision, rf_recall, rf_f1 = evaluate_model(rf, X_train, y_train, X_test, y_test)
print("\nRandom Forest:")
print("Accuracy:", rf_accuracy)
print("Precision:", rf_precision)
print("Recall:", rf_recall)
print("F1 Score:", rf_f1)
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(max_iter=1000, random_state=42)
logreg_accuracy, logreg_precision, logreg_recall, logreg_f1 = evaluate_model(logreg, X_train, y_train, X_test, y_test)
print("\nLogistic Regression:")
print("Accuracy:", logreg_accuracy)
print("Precision:", logreg_precision)
print("Recall:", logreg_recall)
print("F1 Score:", logreg_f1)
gbc = GradientBoostingClassifier(n_estimators=100, random_state=42)
gbc_accuracy, gbc_precision, gbc_recall, gbc_f1 = evaluate_model(gbc, X_train, y_train, X_test, y_test)
print("\nGradient Boosting:")
print("Accuracy:", gbc_accuracy)
print("Precision:", gbc_precision)
print("Recall:", gbc_recall)
print("F1 Score:", gbc_f1)






Random Forest:
Accuracy: 0.8535
Precision: 0.8012048192771084
Recall: 0.3384223918575064
F1 Score: 0.47584973166368516

Logistic Regression:
Accuracy: 0.809
Precision: 0.5364238410596026
Recall: 0.20610687022900764
F1 Score: 0.2977941176470588

Gradient Boosting:
Accuracy: 0.86
Precision: 0.728744939271255
Recall: 0.4580152671755725
F1 Score: 0.5625
