In [1]:
import pandas as pd
import hashlib
import os

In [5]:
# Function to hash the BeneID using SHA-256
def generate_ac(beneficiary_id):
    return hashlib.sha256(beneficiary_id.encode('utf-8')).hexdigest()

# Paths to your original CSV files
input_files = {
    "beneficiary": "/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Beneficiarydata-1542969243754.csv",
    "inpatient": "/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Inpatientdata-1542969243754.csv",
    "outpatient": "/home/mubasshir/Desktop/Research/Insurance/Dataset/archive/Test_Outpatientdata-1542969243754.csv",
}

# Load datasets
df_beneficiary = pd.read_csv(input_files["beneficiary"])
df_inpatient = pd.read_csv(input_files["inpatient"])
df_outpatient = pd.read_csv(input_files["outpatient"])

# Columns to anonymize
bene_col = "BeneID"
claim_col = "ClaimID"

# Step 1: Create BeneID mapping
all_bene_ids = pd.concat([
    df_beneficiary[bene_col],
    df_inpatient[bene_col],
    df_outpatient[bene_col]
]).drop_duplicates().astype(str)
beneid_to_ac = {bid: generate_ac(bid) for bid in all_bene_ids}

# Step 2: Create ClaimID mapping
all_claim_ids = pd.concat([
    df_inpatient[claim_col],
    df_outpatient[claim_col]
]).drop_duplicates().astype(str)
claimid_to_ac = {cid: generate_ac(cid) for cid in all_claim_ids}

# Step 3: Apply BeneID mapping
df_beneficiary[bene_col] = df_beneficiary[bene_col].astype(str).map(beneid_to_ac)
df_inpatient[bene_col] = df_inpatient[bene_col].astype(str).map(beneid_to_ac)
df_outpatient[bene_col] = df_outpatient[bene_col].astype(str).map(beneid_to_ac)

# Step 4: Apply ClaimID mapping
df_inpatient[claim_col] = df_inpatient[claim_col].astype(str).map(claimid_to_ac)
df_outpatient[claim_col] = df_outpatient[claim_col].astype(str).map(claimid_to_ac)

# Step 5: Save anonymized files
df_beneficiary.to_csv("Test_Anonymized_Train_Beneficiarydata.csv", index=False)
df_inpatient.to_csv("Test_Anonymized_Train_Inpatientdata.csv", index=False)
df_outpatient.to_csv("Test_Anonymized_Train_Outpatientdata.csv", index=False)