In [2]:
# Scenario 1: Class Imbalance
# Scenario:
# Class imbalance occurs when the number of instances in different classes is significantly different.
# For example, in a binary classification problem, one class has many more instances than the other.
# Issue with Accuracy:
#               Accuracy can be misleading in the presence of class imbalance.
#               A classifier that predicts the majority class for all instances can have high accuracy but provides little value.
# Advantage of F1-score:
#               F1-score considers both precision and recall, making it a more informative metric in the presence of imbalanced classes.
#               It is less affected by the distribution of classes.

import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# True labels (binary classification: 0 and 1)
y_true = np.array([0, 0, 1, 0, 0, 1, 0, 0, 1, 0])

# Predictions (predicting the majority class for simplicity)
y_pred_majority = np.zeros_like(y_true) # array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

# Metrics
accuracy_majority = accuracy_score(y_true, y_pred_majority)
f1_majority = f1_score(y_true, y_pred_majority)

print("Scenario 1: Class Imbalance")
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred_majority))
print("Accuracy:", accuracy_majority)
print("F1 Score:", f1_majority)

Scenario 1: Class Imbalance
Confusion Matrix:
[[7 0]
 [3 0]]
Accuracy: 0.7
F1 Score: 0.0


In [4]:
# 2. Asymmetric Costs:
# Scenario:
# In some applications, the cost of false positives (Type I errors) may be different from the cost of false negatives (Type II errors).
# For example, in medical diagnosis, a false negative (missing a disease) might have more severe consequences than a false positive.
# Issue with Accuracy:
#               Accuracy treats false positives and false negatives equally, which may not reflect the true impact of different types of errors.
# Advantage of F1-score:
#               F1-score provides a balance between precision and recall, making it suitable for scenarios where the cost of false positives and false negatives differs.

# Increase the cost of false negatives (missing the positive class)
y_true_asymmetric = np.array([0, 0, 1, 1, 0, 1, 0, 1, 1, 0])

# Predictions (predicting the majority class for simplicity)
y_pred_majority_asymmetric = np.zeros_like(y_true_asymmetric)

# Metrics
accuracy_majority_asymmetric = accuracy_score(y_true_asymmetric, y_pred_majority_asymmetric)
f1_majority_asymmetric = f1_score(y_true_asymmetric, y_pred_majority_asymmetric)

print("\nScenario 2: Asymmetric Costs")
print("Confusion Matrix:")
print(confusion_matrix(y_true_asymmetric, y_pred_majority_asymmetric))
print("Accuracy:", accuracy_majority_asymmetric)
print("F1 Score:", f1_majority_asymmetric)



Scenario 2: Asymmetric Costs
Confusion Matrix:
[[5 0]
 [5 0]]
Accuracy: 0.5
F1 Score: 0.0


In [12]:
# 3. Threshold Sensitivity:
# Scenario:
# In some cases, you might need to adjust the classification threshold to prioritize precision or recall based on the application's requirements.
# Issue with Accuracy:
#               Accuracy doesn't provide insights into the classifier's behavior at different thresholds.
# Advantage of F1-score:
#               F1-score considers precision and recall at different thresholds, providing a more nuanced evaluation of model performance.

# Use a threshold for binary classification (0.3 for demonstration purposes)
y_pred_threshold = np.array([0, 0, 1, 0, 0, 1, 0, 0, 1, 0.3])

# Binarize predictions based on the threshold
y_pred_binary = (y_pred_threshold >= 0.3).astype(int)

# Metrics
accuracy_threshold = accuracy_score(y_true, y_pred_binary)
f1_threshold = f1_score(y_true, y_pred_binary)

print("\nScenario 3: Threshold Sensitivity")
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred_binary))
print("Accuracy:", accuracy_threshold)
print("F1 Score:", f1_threshold)



Scenario 3: Threshold Sensitivity
Confusion Matrix:
[[6 1]
 [0 3]]
Accuracy: 0.9
F1 Score: 0.8571428571428571
