In [1]:
#Binary Classification Model Evaluation with Logistic Regression and Probability Thresholding in Python

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
# Generate a synthetic dataset for binary classification
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [4]:
# Print the first few rows of the dataset
print("Dataset:")
print("X (features):")
print(X[:5])  # Printing the first 5 rows
print("\ny (labels):")
print(y[:5])  # Printing the first 5 rows

Dataset:
X (features):
[[ 0.96479937 -0.06644898  0.98676805 -0.35807945  0.99726557  1.18189004
  -1.61567885 -1.2101605  -0.62807677  1.22727382]
 [-0.91651053 -0.56639459 -1.00861409  0.83161679 -1.17696211  1.82054391
   1.75237485 -0.98453405  0.36389642  0.20947008]
 [-0.10948373 -0.43277388 -0.4576493   0.79381847 -0.26864575 -1.83635978
   1.23908594 -0.2463834  -1.05814521 -0.29737608]
 [ 1.75041163  2.02360622  1.68815935  0.00679984 -1.60766103  0.18474058
  -2.61942676 -0.35744542 -1.47312719 -0.19003904]
 [-0.22472606 -0.71130323 -0.22077758  0.11712422  1.53606118  0.59753771
   0.34864462 -0.93915557  0.17591477  0.23622365]]

y (labels):
[0 1 1 0 1]


In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

In [8]:
# Make predictions on the test set
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Probability of positive class

In [10]:
print(y_pred_prob[:5])

[0.2213977  0.96808874 0.35784756 0.9252606  0.02459104]


In [11]:
# Convert probabilities to binary predictions using a threshold (e.g., 0.5)
y_pred = (y_pred_prob > 0.5).astype(int)

In [12]:
print(y_pred[:5])

[0 1 0 1 0]


In [13]:
# Evaluate the model using confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [14]:
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_rep)

Confusion Matrix:
[[75 14]
 [20 91]]

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.84      0.82        89
           1       0.87      0.82      0.84       111

    accuracy                           0.83       200
   macro avg       0.83      0.83      0.83       200
weighted avg       0.83      0.83      0.83       200

