In [27]:
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, recall_score  # ✅ THIS LINE

In [11]:
df = pd.read_csv('/home/mubasshir/Desktop/research/Our_Work/Based_On_Dataset_1/dataset/medical_aid_claims.csv')

In [12]:
# Split fraud and legitimate cases
fraud_df = df[df["label"] == 1]
legit_df = df[df["label"] == 0]

In [13]:
# Identify numeric columns for threshold analysis
numeric_cols = ["Fee Charged", "membership_period", "number_of_claims", "number_of_dependants"]

In [14]:
# Compare mean/std to flag suspicious ranges
rule_candidates = []

In [15]:
for col in numeric_cols:
    fraud_mean = fraud_df[col].mean()
    legit_mean = legit_df[col].mean()
    fraud_std = fraud_df[col].std()
    if fraud_mean > legit_mean:
        threshold = legit_df[col].quantile(0.9)
        rule_candidates.append((col, ">", threshold))
    elif fraud_mean < legit_mean:
        threshold = legit_df[col].quantile(0.1)
        rule_candidates.append((col, "<", threshold))

In [17]:
def evaluate_rule(df, col, op, threshold):
    if op == ">":
        predictions = (df[col] > threshold).astype(int)
    else:
        predictions = (df[col] < threshold).astype(int)
    precision = precision_score(df["label"], predictions, zero_division=0)
    recall = recall_score(df["label"], predictions, zero_division=0)
    return precision, recall

In [25]:
accepted_rules = []
min_precision = 0.2
min_recall = 0.05

for col, op, threshold in rule_candidates:
    precision, recall = evaluate_rule(df, col, op, threshold)
    # print(f"Rule: {col} {op} {threshold:.2f} => Precision: {precision:.2f}, Recall: {recall:.2f}")
    if precision >= min_precision and recall >= min_recall:
        accepted_rules.append((col, op, threshold, precision, recall))

print(accepted_rules)

[('Fee Charged', '>', 45109.0, 0.2131837307152875, 0.11022480058013052), ('membership_period', '<', 999.0, 0.21508379888268156, 0.1116751269035533)]


In [26]:
thresholds = [(900, 15000), (1000, 20000), (1200, 25000)]
for mp, fee in thresholds:
    sub = df[(df["membership_period"] < mp) & (df["Fee Charged"] > fee)]
    if len(sub) == 0:
        continue
    p = sub["label"].mean()
    r = sub["label"].sum() / df["label"].sum()
    print(f"Rule: mp<{mp} and fee>{fee} → Precision: {p:.2f}, Recall: {r:.2f}, Count: {len(sub)}")


Rule: mp<900 and fee>15000 → Precision: 0.21, Recall: 0.07, Count: 465
Rule: mp<1000 and fee>20000 → Precision: 0.21, Recall: 0.07, Count: 438
Rule: mp<1200 and fee>25000 → Precision: 0.20, Recall: 0.07, Count: 450
