
Metric Engineering Beyond Accuracy

In [31]:
import numpy as np
import pandas as pd

np.random.seed(42)
print("Libraries imported and random seed set.")

Libraries imported and random seed set.


In [32]:
n_samples = 1000

# Generate synthetic features
data = {
    'user_id': np.arange(n_samples),
    'age': np.random.randint(18, 80, n_samples),
    'premium': np.random.uniform(500, 3000, n_samples),
    'claims_history_score': np.random.uniform(0, 1, n_samples)
}

df = pd.DataFrame(data)

# Generate imbalanced binary target variable 'is_fraud'
base_fraud_probability = 0.05

# Adjust probability based on age (e.g., higher for younger/older, but simplified for demonstration)
# Let's say slightly higher for younger and slightly higher for older
df['fraud_probability'] = base_fraud_probability
df.loc[df['age'] < 30, 'fraud_probability'] += 0.02
df.loc[df['age'] > 60, 'fraud_probability'] += 0.01

# Adjust probability based on claims_history_score (lower score -> higher risk)
df['fraud_probability'] -= (df['claims_history_score'] * 0.05) # Subtract more for higher scores
df['fraud_probability'] = np.clip(df['fraud_probability'], 0.01, 0.15) # Ensure probabilities are within a reasonable range

df['is_fraud'] = (np.random.rand(n_samples) < df['fraud_probability']).astype(int)

# Drop the intermediate fraud_probability column
df = df.drop(columns=['fraud_probability'])

print(f"Synthetic Insurance Dataset created with {n_samples} samples.")
print("Dataset head:")
print(df.head())
print("\nFraud distribution:")
print(df['is_fraud'].value_counts(normalize=True))



Synthetic Insurance Dataset created with 1000 samples.
Dataset head:
   user_id  age      premium  claims_history_score  is_fraud
0        0   56  1441.847425              0.380075         0
1        1   69  2373.945750              0.197154         0
2        2   46  1482.473622              0.479183         0
3        3   32  2572.910552              0.194558         0
4        4   60  1922.703673              0.166332         0

Fraud distribution:
is_fraud
0    0.966
1    0.034
Name: proportion, dtype: float64


In [33]:
def f_beta_score(y_true, y_pred, beta):
    """
    Calculates the F_beta score for binary classification.

    Args:
        y_true (list or np.array): True labels.
        y_pred (list or np.array): Predicted labels.
        beta (float): The beta parameter that determines the weight of recall in the combined score.
                      beta < 1 lends more weight to precision, while beta > 1 favors recall.
                      beta = 1 is the F1 score.

    Returns:
        float: The calculated F_beta score.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate True Positives (TP), False Positives (FP), False Negatives (FN)
    TP = np.sum((y_true == 1) & (y_pred == 1))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))

    # Calculate Precision
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0

    # Calculate Recall
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0

    # Calculate F_beta score
    denominator = (beta**2 * precision) + recall
    fbeta = (1 + beta**2) * (precision * recall) / denominator if denominator > 0 else 0

    return fbeta

print("f_beta_score function defined.")

f_beta_score function defined.


In [34]:
def mcme_score(y_true, y_pred_proba, y_pred):
    """
    Calculates the Mean Confident Misclassification Error (MCME).

    Args:
        y_true (list or np.array): True labels.
        y_pred_proba (list or np.array): Predicted probabilities for the positive class.
        y_pred (list or np.array): Predicted labels.

    Returns:
        float: The calculated MCME score.
    """
    y_true = np.array(y_true)
    y_pred_proba = np.array(y_pred_proba)
    y_pred = np.array(y_pred)

    # Identify misclassified instances
    misclassified_indices = np.where(y_true != y_pred)[0]

    if len(misclassified_indices) == 0:
        return 0.0 # No misclassifications, MCME is 0

    # Calculate confidence of incorrect predictions for misclassified instances
    confident_misclassifications = []
    for idx in misclassified_indices:
        if y_pred[idx] == 1: # Model predicted 1, but true is 0 (FP)
            confidence = y_pred_proba[idx] # Confidence in predicting 1
        else: # Model predicted 0, but true is 1 (FN)
            confidence = 1 - y_pred_proba[idx] # Confidence in predicting 0
        confident_misclassifications.append(confidence)

    # Calculate the mean of these confidence scores
    mcme = np.mean(confident_misclassifications)

    return mcme

print("mcme_score function defined.")

mcme_score function defined.


In [35]:
def brapl_score(y_true, y_pred, costs_benefits):
    """
    Calculates the Business Risk-Adjusted Profit/Loss (BRAPL).

    Args:
        y_true (list or np.array): True labels.
        y_pred (list or np.array): Predicted labels.
        costs_benefits (dict): A dictionary defining the financial impact for each outcome:
                               'TP_benefit', 'TN_benefit', 'FP_cost', 'FN_cost'.

    Returns:
        float: The calculated total profit/loss.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Calculate True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN)
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))

    # Calculate total profit/loss
    total_profit_loss = (
        (TP * costs_benefits['TP_benefit']) +
        (TN * costs_benefits['TN_benefit']) -
        (FP * costs_benefits['FP_cost']) -
        (FN * costs_benefits['FN_cost'])
    )

    return total_profit_loss

print("brapl_score function defined.")

brapl_score function defined.


## Generate Mock Predictions and Calculate Custom Metrics

### Subtask:
Generate mock predictions (predicted labels and predicted probabilities) for a binary classification model on the previously created synthetic 'Insurance dataset'. Then, use the implemented `F_beta score`, `Mean Confident Misclassification Error (MCME)`, and `Business Risk-Adjusted Profit/Loss (BRAPL)` functions to calculate their values based on these mock predictions and the true labels.

#### Instructions
1.  **Generate Mock Predictions:**
    *   Create an array `y_true` from the `is_fraud` column of the `df` DataFrame.
    *   Generate `y_pred_proba` (predicted probabilities for the positive class) as an array of random floats between 0 and 1, ensuring some correlation with `y_true` to make the predictions somewhat realistic (e.g., higher probabilities for true fraud cases, but with noise).
    *   Derive `y_pred` (predicted labels) from `y_pred_proba` using a threshold (e.g., 0.5). Varying the threshold can simulate different model behaviors.
2.  **Calculate F_beta Score:**
    *   Choose a `beta` value (e.g., 2, to emphasize recall for fraud detection) and call the `f_beta_score` function with `y_true`, `y_pred`, and the chosen `beta`.
3.  **Calculate MCME:**
    *   Call the `mcme_score` function with `y_true`, `y_pred_proba`, and `y_pred`.
4.  **Calculate BRAPL:**
    *   Define a `costs_benefits` dictionary with realistic (mock) financial values for `TP_benefit`, `TN_benefit`, `FP_cost`, and `FN_cost` relevant to an insurance fraud scenario.
    *   Call the `brapl_score` function with `y_true`, `y_pred`, and the `costs_benefits` dictionary.
5.  **Display Results:** Print the calculated values for each of the three custom metrics.

**Reasoning**:
I will generate mock predictions by taking the 'is_fraud' column as true labels, creating predicted probabilities that are somewhat correlated with the true labels, and then deriving predicted labels using a threshold. This setup will simulate a classification model's output.



In [36]:
y_true = df['is_fraud'].values

# Generate mock predicted probabilities (y_pred_proba)
# Make them somewhat correlated with y_true for realism
np.random.seed(42) # Ensure reproducibility for predictions as well
y_pred_proba = np.zeros(len(y_true))
for i, true_label in enumerate(y_true):
    if true_label == 1: # For true fraud cases, higher probability of predicting fraud
        y_pred_proba[i] = np.random.uniform(0.6, 0.95) # High confidence for fraud
    else: # For true non-fraud cases, lower probability of predicting fraud
        y_pred_proba[i] = np.random.uniform(0.05, 0.4) # Low confidence for fraud

# Add some noise/randomness to make it less perfect
noise = np.random.normal(0, 0.1, len(y_true))
y_pred_proba = np.clip(y_pred_proba + noise, 0, 1)

# Derive predicted labels (y_pred) using a threshold
threshold = 0.5
y_pred = (y_pred_proba >= threshold).astype(int)

print("Mock predictions generated.")
print(f"\nSample y_true (first 10): {y_true[:10]}")
print(f"Sample y_pred_proba (first 10): {np.round(y_pred_proba[:10], 2)}")
print(f"Sample y_pred (first 10): {y_pred[:10]}")

Mock predictions generated.

Sample y_true (first 10): [0 0 0 0 0 0 0 0 0 0]
Sample y_pred_proba (first 10): [0.2  0.25 0.34 0.32 0.16 0.21 0.15 0.4  0.25 0.13]
Sample y_pred (first 10): [0 0 0 0 0 0 0 0 0 0]


**Reasoning**:
I have successfully generated mock predictions. Now, I will calculate the F_beta score using the previously defined `f_beta_score` function, providing a beta value of 2 to emphasize recall for fraud detection, as specified in the instructions.



In [37]:
beta_value = 2
f_beta = f_beta_score(y_true, y_pred, beta_value)
print(f"Calculated F-beta (beta={beta_value}) score: {f_beta:.4f}")

Calculated F-beta (beta=2) score: 0.8947


**Reasoning**:
The previous step calculated the F-beta score. Now, I will calculate the Mean Confident Misclassification Error (MCME) using the `mcme_score` function, as outlined in the instructions, providing `y_true`, `y_pred_proba`, and `y_pred`.



In [38]:
mcme = mcme_score(y_true, y_pred_proba, y_pred)
print(f"Calculated MCME score: {mcme:.4f}")

Calculated MCME score: 0.5430


**Reasoning**:
The F-beta and MCME scores have been calculated. Now, I will calculate the Business Risk-Adjusted Profit/Loss (BRAPL) score. This involves defining a `costs_benefits` dictionary with mock financial values and then calling the `brapl_score` function with `y_true`, `y_pred`, and the `costs_benefits` dictionary, as specified in the instructions.



In [39]:
costs_benefits = {
    'TP_benefit': 1000,  # Benefit for correctly identifying fraud
    'TN_benefit': 50,    # Benefit for correctly identifying non-fraud (e.g., lower processing cost)
    'FP_cost': 200,      # Cost for incorrectly flagging non-fraud (e.g., investigation cost, customer dissatisfaction)
    'FN_cost': 5000      # Cost for missing actual fraud (e.g., loss from undetected fraud)
}

brapl = brapl_score(y_true, y_pred, costs_benefits)
print(f"Calculated BRAPL score: ${brapl:.2f}")

Calculated BRAPL score: $77300.00
