<a href="https://colab.research.google.com/github/peeka-boo0/ml-learning-journey/blob/main/noteboooks/Day_16_class_wight_%26_SMOTE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Make imbalance artificially (reduce positives)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42) #splinting the data for no dataleakge so we can train the model on train data sat and test on test data set

# ---- 1️⃣ Normal Logistic Regression ----
clf = LogisticRegression(max_iter=5000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("=== Normal Logistic Regression ===")
print(classification_report(y_test, y_pred))

# ---- 2️⃣ Handle imbalance with class_weight ----
clf_weighted = LogisticRegression(max_iter=5000, class_weight="balanced") #the class weight help in balancing the class datas by giving minor data the weight it need to match the same wight to majoe class
clf_weighted.fit(X_train, y_train)
y_pred_weighted = clf_weighted.predict(X_test)

print("=== Class Weight Balanced Logistic Regression ===")
print(classification_report(y_test, y_pred_weighted))

# ---- 3️⃣ Handle imbalance with SMOTE ----
smote = SMOTE(random_state=42)  #inishlizing smote
X_res, y_res = smote.fit_resample(X_train, y_train) #knowing the minor and major & creating the new artifical data to balance the dataset

clf_smote = LogisticRegression(max_iter=5000)
clf_smote.fit(X_res, y_res)
y_pred_smote = clf_smote.predict(X_test)

print("=== Logistic Regression with SMOTE ===")
print(classification_report(y_test, y_pred_smote))


=== Normal Logistic Regression ===
              precision    recall  f1-score   support

           0       0.97      0.89      0.93        64
           1       0.94      0.98      0.96       107

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171

=== Class Weight Balanced Logistic Regression ===
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        64
           1       0.95      0.98      0.96       107

    accuracy                           0.95       171
   macro avg       0.96      0.94      0.95       171
weighted avg       0.95      0.95      0.95       171

=== Logistic Regression with SMOTE ===
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        64
           1       0.95      0.98      0.96       107

    accuracy                           0.95       171
   m