<a href="https://colab.research.google.com/github/Raiyan-DEA/Dataframe_content/blob/main/2_Model_Evaluation_Feature_Engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# STEP 1: Install and Import Required Libraries
!pip install -q imbalanced-learn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

In [16]:
# STEP 2: Load Dataset (Telco Churn)
# url = "https://raw.githubusercontent.com/blastchar/telco-churn/master/WA_Fn-UseC_-Telco-Customer-Churn.csv"
df = pd.read_csv('/content/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df.dropna(inplace=True)


In [17]:
# STEP 3: Encode Target Column
le = LabelEncoder()
df['Churn'] = le.fit_transform(df['Churn'])

In [19]:
print(df['Churn'])

0       0
1       0
2       1
3       0
4       1
       ..
7038    0
7039    0
7040    0
7041    1
7042    0
Name: Churn, Length: 7043, dtype: int64


In [20]:
# STEP 4: Convert Non-Numeric Columns
cat_cols = df.select_dtypes(include='object').columns.drop('customerID')
df = pd.get_dummies(df, columns=cat_cols, drop_first=True)


In [21]:
# STEP 5: Train/Test Split
X = df.drop(['customerID', 'Churn'], axis=1)
y = df['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)



In [22]:
# STEP 6: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [24]:
# STEP 7: Train Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)


In [25]:
# STEP 8: Evaluate Model (Before Balancing)
print("Before Handling Imbalance")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Before Handling Imbalance
[[1612  185]
 [ 348  321]]
              precision    recall  f1-score   support

           0       0.82      0.90      0.86      1797
           1       0.63      0.48      0.55       669

    accuracy                           0.78      2466
   macro avg       0.73      0.69      0.70      2466
weighted avg       0.77      0.78      0.77      2466



In [26]:
# STEP 9: Handle Imbalanced Classes with SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train_scaled, y_train)

model_smote = LogisticRegression(max_iter=1000)
model_smote.fit(X_res, y_res)
y_pred_smote = model_smote.predict(X_test_scaled)

In [27]:
# STEP 10: Evaluate Model (After SMOTE)
print("After Handling Imbalance (SMOTE)")
print(confusion_matrix(y_test, y_pred_smote))
print(classification_report(y_test, y_pred_smote))



After Handling Imbalance (SMOTE)
[[1550  247]
 [ 308  361]]
              precision    recall  f1-score   support

           0       0.83      0.86      0.85      1797
           1       0.59      0.54      0.57       669

    accuracy                           0.77      2466
   macro avg       0.71      0.70      0.71      2466
weighted avg       0.77      0.77      0.77      2466



In [None]:
# STEP 11: Feature Importance
importance = pd.Series(model_smote.coef_[0], index=X.columns).sort_values(ascending=False)
plt.figure(figsize=(10, 6))
importance.head(15).plot(kind='barh')
plt.title("Top 15 Important Features")
plt.gca().invert_yaxis()
plt.show()