<a href="https://colab.research.google.com/github/snehagandla30/codsoft/blob/main/ML%20Projects/Customer_Churn_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 2: Load the dataset
data = pd.read_csv('Churn_Modelling.csv')

# Step 3: Check for any missing values
print(data.isnull().sum())

# Step 4: Preprocessing the data

# Drop irrelevant coloumns
# The keyword argument should be 'columns', not 'coloumns'
data = data.drop(columns=['RowNumber', 'CustomerId', 'Surname'])

# Encode categorical variables (Geography, Gender)
labelencoder = LabelEncoder()
data['Geography'] = labelencoder.fit_transform(data['Geography'])
data['Gender'] = labelencoder.fit_transform(data['Gender'])

# Step 5: Split the data into features (X) and target variable (y)
X = data.drop(columns=['Exited'])
y = data['Exited']

# Step 6: Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Step 8: Train models

# Logistics Regression Model
log_reg_model = LogisticRegression(random_state=42)
log_reg_model.fit(X_train, y_train)

# Random Forest Classifier Model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Step 9: Make predictions
log_reg_predictions = log_reg_model.predict(X_test)
rf_predictions = rf_model.predict(X_test)

# Step 10: Evaluate the models

# Logistics Regression Evaluation
print("Logistics Regression Evaluation:")
print("Accuracy:", accuracy_score(y_test, log_reg_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, log_reg_predictions))
print("Classification Report:\n", classification_report(y_test, log_reg_predictions))

# Random Forest Evaluation
print("\nRandom Forest Evaluation:")
print("Accuracy:", accuracy_score(y_test, rf_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_predictions))
print("Classification Report:\n", classification_report(y_test, rf_predictions))

# Step 11: Output Predictions
print("\nSample of Predictions:")
for idx, row in enumerate(X_test):
  print(f"ID: {y_test.index[idx]}, True Exited: {y_test.iloc[idx]}, Logistic Regression Prediction: {log_reg_predictions[idx]}, Random Forest Prediction: {rf_predictions[idx]}")