In [43]:
import pandas as pd

In [44]:
# Load the datasets into DataFrames
main_claim_df = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test-1542969243754.csv")
beneficiary_df = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Beneficiarydata-1542969243754.csv")
inpatient_df = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Inpatientdata-1542969243754.csv")
outpatient_df = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Outpatientdata-1542969243754.csv")

# Load Test Datasets
# test_main = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test-1542969243754.csv")
# test_beneficiary = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Beneficiarydata-1542969243754.csv")
# test_inpatient = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Inpatientdata-1542969243754.csv")
# test_outpatient = pd.read_csv("/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Outpatientdata-1542969243754.csv")

In [45]:
# Merge datasets
merged_df = inpatient_df.merge(beneficiary_df, on='BeneID', how='left')
merged_df = merged_df.merge(main_claim_df, on='Provider', how='left')

In [46]:
# Prepare required inputs
patient_records = [{"Patient_ID": row["BeneID"], "Service_Token": row["ClaimID"]} for _, row in merged_df.iterrows()]
service_records = patient_records.copy()

In [47]:
# Extract billing codes
diagnosis_cols = [col for col in merged_df.columns if "ClmDiagnosisCode" in col]
billing_codes_master = pd.unique(merged_df[diagnosis_cols].values.ravel())
billing_codes_master = [code for code in billing_codes_master if pd.notna(code)]


In [48]:
# Set fixed insurance policy
insurance_policy = {"Max_Coverage": 50000}

In [49]:
# Define the rule-based fraud detection function
def strict_algorithm_fraud_check(
    patient_id, service_token, claim_record,
    patient_records, service_records,
    billing_codes_master, insurance_policy
):
    ac = f"{patient_id}_{service_token}"

    # Rule 1: False Identity
    valid_prs = {f"{pr['Patient_ID']}_{pr['Service_Token']}" for pr in patient_records}
    if ac not in valid_prs:
        return "Fraud: False Identity or Misrepresentation of Identity", 1

    # Rule 2: Invalid Service Record
    valid_srs = {f"{sr['Patient_ID']}_{sr['Service_Token']}" for sr in service_records}
    if ac not in valid_srs:
        return "Fraud: Invalid Service Record (Billing for Unprovided Services)", 1

    # Rule 3: Multiple Claims
    if claim_record.get("Insurance_ID") is not None:
        return "Fraud: Multiple Claims Detected", 1

    # Rule 4: Fake Billing Code
    claim_codes = claim_record.get("Billing_Codes", [])
    if not all(code in billing_codes_master for code in claim_codes):
        return "Fraud: Fake Billing Code", 1
    else:
        # Rule 5: Inflated Claims
        expected_bill = 1000 * len(claim_codes)
        billed_amount = claim_record.get("Billed_Amount", 0)
        if billed_amount > expected_bill * 1.5:
            return "Fraud: Inflated Claims Detected", 1

    # Rule 6: Misrepresentation of Coverage
    if claim_record.get("Billed_Amount", 0) > insurance_policy.get("Max_Coverage", float('inf')):
        return "Fraud: Misrepresentation of Coverage", 1

    # Rule 7: Underpayment Check
    paid_amount = claim_record.get("Paid_Amount")
    if paid_amount is not None and paid_amount > 0:
        return "Fraud: Underpayment Detected", 1

    # All checks passed — Valid claim
    return "Insurance Claim is Valid", 0

In [None]:
results = []
for _, row in merged_df.iterrows():
    patient_id = row["BeneID"]
    service_token = row["ClaimID"]
    Provider = row['Provider']
    billing_codes = [row[col] for col in diagnosis_cols if pd.notna(row[col])]

    claim_record = {
        "Insurance_ID": None,  # Simulating no duplicate
        "Billing_Codes": billing_codes,
        "Billed_Amount": row["InscClaimAmtReimbursed"],
        "Paid_Amount": None,
        "Provider": row["Provider"]
    }

    reason ,label = strict_algorithm_fraud_check(
        patient_id, service_token, claim_record,
        patient_records, service_records,
        billing_codes_master, insurance_policy
    )

    results.append({
        "Provider" : Provider,
        "BeneID": patient_id,
        "ClaimID": service_token,
        "Provider": row["Provider"],
        "Billed_Amount": row["InscClaimAmtReimbursed"],
        "Actual_PotentialFraud": label,
        "Detected_Fraud_Reason": reason
    })

# Save or display results
results_df = pd.DataFrame(results)
results_df.to_csv("Our_Algo.csv", index=False)
print(results_df.head())