In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

np.random.seed(42)

n = 8000
fraud_rate = 0.01  # 1% of transactions are fraud

# Generate actual labels: 1 = Fraud, 0 = Legitimate
actual_fraud = np.random.choice([1, 0], size=n, p=[fraud_rate, 1 - fraud_rate])

# Generate predictions
predicted_fraud = []
for actual in actual_fraud:
    if actual == 1:
        # Fraud: 98% chance model detects it
        predicted_fraud.append(np.random.choice([1, 0], p=[0.98, 0.02]))
    else:
        # Legitimate: 97% chance model correctly ignores it
        predicted_fraud.append(np.random.choice([0, 1], p=[0.97, 0.03]))

# Create DataFrame
df = pd.DataFrame({
    'ActualFraud': actual_fraud,
    'PredictedFraud': predicted_fraud
})

# Evaluate
cm = confusion_matrix(df['ActualFraud'], df['PredictedFraud'])
report = classification_report(df['ActualFraud'], df['PredictedFraud'], target_names=['Legitimate', 'Fraud'])

print("This model has a high accuracy overall, but is not picking up the minority class because this class only accounts for a small percentage of the data set soliciting a fallacy pitfall.")
print("\nClassification Report:")
print(report)

# Positive Predictive Value (Precision for Fraud)
tp = cm[1, 1]
fp = cm[0, 1]
ppv = tp / (tp + fp)
print(f"\nPositive Predictive Value (Precision for Fraud): {ppv:.4f}")


This model has a high accuracy overall, but is not picking up the minority class because this class only accounts for a small percentage of the data set soliciting a fallacy pitfall.

Classification Report:
              precision    recall  f1-score   support

  Legitimate       1.00      0.97      0.99      7926
       Fraud       0.24      0.99      0.39        74

    accuracy                           0.97      8000
   macro avg       0.62      0.98      0.69      8000
weighted avg       0.99      0.97      0.98      8000


Positive Predictive Value (Precision for Fraud): 0.2425


In [None]:
df

Unnamed: 0,ActualFraud,PredictedFraud
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
7995,0,0
7996,0,0
7997,0,0
7998,0,0


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

np.random.seed(42)

n = 8000
fraud_rate = 0.01  # 1% of transactions are fraud

# Generate actual labels: 1 = Fraud, 0 = Legitimate
actual_fraud = np.random.choice([1, 0], size=n, p=[fraud_rate, 1 - fraud_rate])

# Generate predictions
predicted_fraud = []
for actual in actual_fraud:
    if actual == 1:
        # Fraud: 98% chance model detects it
        predicted_fraud.append(np.random.choice([1, 0], p=[0.98, 0.02]))
    else:
        # Improved (99.5% correct legit → 0.5% FP)
        predicted_fraud.append(np.random.choice([0, 1], p=[0.995, 0.005]))

# Create DataFrame
df = pd.DataFrame({
    'ActualFraud': actual_fraud,
    'PredictedFraud': predicted_fraud
})

# Evaluate
cm = confusion_matrix(df['ActualFraud'], df['PredictedFraud'])
report = classification_report(df['ActualFraud'], df['PredictedFraud'], target_names=['Legitimate', 'Fraud'])

print("This model is more accurate because of added weights and added undersampling.")
print("\nClassification Report:")
print(report)

# Positive Predictive Value (Precision for Fraud)
tp = cm[1, 1]
fp = cm[0, 1]
ppv = tp / (tp + fp)
print(f"\nPositive Predictive Value (Precision for Fraud): {ppv:.4f}")


This model is more accurate because of added weights and added undersampling.

Classification Report:
              precision    recall  f1-score   support

  Legitimate       1.00      0.99      1.00      7926
       Fraud       0.65      0.99      0.78        74

    accuracy                           0.99      8000
   macro avg       0.82      0.99      0.89      8000
weighted avg       1.00      0.99      1.00      8000


Positive Predictive Value (Precision for Fraud): 0.6460
