In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from google.colab import drive
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# Define the path to your dataset
path_to_dataset = '/content/drive/MyDrive/Churn_Modelling.csv'

# Load the dataset
df = pd.read_csv(path_to_dataset)

# Display the first few rows and column names of the dataset
print("Dataset preview:")
print(df.head())
print("\nColumn names:")
print(df.columns)

# Update target column name
target_column = 'Exited'  # Correct column name based on provided column names

# Check if the target column exists
if target_column not in df.columns:
    raise KeyError(f"'{target_column}' column not found in the dataset. Please check the column names.")

# Drop non-numeric columns for simplicity, you might need to preprocess categorical columns
X = df.drop([target_column, 'RowNumber', 'CustomerId', 'Surname'], axis=1)  # Dropping non-predictive columns
y = df[target_column]

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
log_reg = LogisticRegression(random_state=42)
rand_forest = RandomForestClassifier(random_state=42)
grad_boost = GradientBoostingClassifier(random_state=42)

# Train models
log_reg.fit(X_train, y_train)
rand_forest.fit(X_train, y_train)
grad_boost.fit(X_train, y_train)

# Make predictions
log_reg_preds = log_reg.predict(X_test)
rand_forest_preds = rand_forest.predict(X_test)
grad_boost_preds = grad_boost.predict(X_test)

# Evaluate models
print("Logistic Regression:")
print("Accuracy:", accuracy_score(y_test, log_reg_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, log_reg_preds))
print("Classification Report:\n", classification_report(y_test, log_reg_preds))

print("Random Forest:")
print("Accuracy:", accuracy_score(y_test, rand_forest_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, rand_forest_preds))
print("Classification Report:\n", classification_report(y_test, rand_forest_preds))

print("Gradient Boosting:")
print("Accuracy:", accuracy_score(y_test, grad_boost_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, grad_boost_preds))
print("Classification Report:\n", classification_report(y_test, grad_boost_preds))
