In [1]:
#1. Import necessary libraries and Load the Breast Cancer dataset:

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [8]:
data = load_breast_cancer()
X, y = data.data, data.target


In [3]:
#2. Split the dataset into training and testing sets:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
#3. Train a logistic regression model:

classifier = LogisticRegression(max_iter=10000)
classifier.fit(X_train, y_train)


LogisticRegression(max_iter=10000)

In [5]:
#Import numpy for further analysis
import numpy as np

In [6]:
#4. Experiment with different threshold values:

thresholds = np.arange(0.1, 1.0, 0.1)
y_probs = classifier.predict_proba(X_test)[:, 1]

for threshold in thresholds:
    y_pred = (y_probs >= threshold).astype(int)
    print(f"Threshold: {threshold:.1f}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))


Threshold: 0.1
[[38  5]
 [ 1 70]]
              precision    recall  f1-score   support

           0       0.97      0.88      0.93        43
           1       0.93      0.99      0.96        71

    accuracy                           0.95       114
   macro avg       0.95      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114

Threshold: 0.2
[[39  4]
 [ 1 70]]
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Threshold: 0.3
[[39  4]
 [ 1 70]]
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96   

In [None]:
#5. Analyze the results to identify the optimal threshold:

# From the results, you can analyze the trade-offs between false positives and false negatives at each 
# threshold value. Choose the threshold value that best balances this trade-off or maximizes the chosen 
# performance metric, such as F1-score or precision-recall.

# *Note: The optimal threshold value will depend on the specific problem context and the importance of 
# false positives vs. false negatives in that context.
