#Preparing Dataset


## mounting google drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## funciton for preparing data set for model


In [11]:
import random
import pandas as pd
import numpy as np

def generate_kidney_compatibility_dataset(num_samples=500):
    blood_types = ["A", "B", "AB", "O"]
    genders = ["Male", "Female"]
    hla_a = ["A1", "A2", "A3"]
    hla_b = ["B7", "B8", "B27"]
    hla_c = ["Cw3", "Cw4", "Cw5"]
    hla_drb1 = ["DR1", "DR4", "DR7"]
    hla_dqb1 = ["DQ2", "DQ3", "DQ4"]
    organ_type= "kidney"

    data = []
    for _ in range(num_samples):
        # Donor fields
        donor_id = random.randint(1000, 9999)
        donor_age = random.randint(18, 65)
        donor_gender = random.choice(genders)
        donor_height = round(random.uniform(150, 190), 1)
        donor_weight = round(random.uniform(48, 110), 1)
        donor_blood_type = random.choice(blood_types)
        donor_has_diabetes = random.choice([0, 1])
        donor_has_hypertension = random.choice([0, 1])
        donor_serum_creatinine = round(random.uniform(0.6, 2.0), 2)
        donor_hla_a = random.choice(hla_a)
        donor_hla_b = random.choice(hla_b)
        donor_hla_c = random.choice(hla_c)
        donor_hla_drb1 = random.choice(hla_drb1)
        donor_hla_dqb1 = random.choice(hla_dqb1)
        kdpi_score = round(np.clip(
            0.9*donor_age + 5*donor_has_diabetes + 4*donor_has_hypertension + 5*(donor_serum_creatinine-1) + random.uniform(-10, 10),
            0, 100), 1)

        # Recipient fields
        patient_id = random.randint(10000, 99999)
        recipient_age = random.randint(8, 75)
        recipient_gender = random.choice(genders)
        recipient_height = round(random.uniform(140, 195), 1)
        recipient_weight = round(random.uniform(35, 120), 1)
        recipient_blood_type = random.choice(blood_types)
        recipient_cpra_score = random.choices([random.uniform(0, 50), random.uniform(50, 99)], weights=[0.6, 0.4])[0]
        recipient_years_dialysis = round(random.uniform(0, 8), 1)
        recipient_previous_transplant = random.choice([0, 0, 0, 1])  # 1 is rarer
        recipient_has_diabetes = random.choice([0, 1])
        recipient_hla_a = random.choice(hla_a)
        recipient_hla_b = random.choice(hla_b)
        recipient_hla_c = random.choice(hla_c)
        recipient_hla_drb1 = random.choice(hla_drb1)
        recipient_hla_dqb1 = random.choice(hla_dqb1)
        epts_score = np.clip(
            4.7 * max(recipient_age-25,0)/100 +
            1.26*recipient_has_diabetes +
            3.98*recipient_previous_transplant +
            3.15 * np.log(recipient_years_dialysis+1)/10 +
            random.uniform(-0.3, 0.2), 0, 10) * 10

        # Basic compatibility calculations
        abo_compatible = int(donor_blood_type == recipient_blood_type or recipient_blood_type == "AB")
        hla_match_a = int(donor_hla_a == recipient_hla_a)
        hla_match_b = int(donor_hla_b == recipient_hla_b)
        hla_match_c = int(donor_hla_c == recipient_hla_c)
        hla_match_drb1 = int(donor_hla_drb1 == recipient_hla_drb1)
        hla_match_dqb1 = int(donor_hla_dqb1 == recipient_hla_dqb1)
        total_hla_mismatch = 6 - sum([
            hla_match_a, hla_match_b, hla_match_c, hla_match_drb1, hla_match_dqb1
        ])
        # Simulate clinical match "score"
        match_score = (
            30 * abo_compatible +
            5 * (6-total_hla_mismatch) +
            10 * (recipient_cpra_score >= 80) +
            5 * (abs(recipient_age-donor_age) <= 10) +
            8 * (kdpi_score <= 35) +
            8 * (epts_score <= 35) +
            -3 * abs(recipient_years_dialysis-2)  # penalty for long dialysis
        )

        # Binary target: "is highly compatible match" (simulate as a function of above)
        compatible = int(
            abo_compatible and
            total_hla_mismatch <= 4 and
            kdpi_score < 70 and
            epts_score < 70 and
            match_score > 50
        )

        data.append({
            # Donor features
            "organ_type": organ_type,"donor_id": donor_id, "donor_age": donor_age, "donor_gender": donor_gender, "donor_height": donor_height,
            "donor_weight": donor_weight, "donor_blood_type": donor_blood_type, "donor_has_diabetes": donor_has_diabetes,
            "donor_has_hypertension": donor_has_hypertension, "donor_serum_creatinine": donor_serum_creatinine,
            "donor_hla_a": donor_hla_a, "donor_hla_b": donor_hla_b, "donor_hla_c": donor_hla_c,
            "donor_hla_drb1": donor_hla_drb1, "donor_hla_dqb1": donor_hla_dqb1, "kdpi_score": kdpi_score,
            # Recipient features
            "patient_id": patient_id, "recipient_age": recipient_age, "recipient_gender": recipient_gender,
            "recipient_height": recipient_height, "recipient_weight": recipient_weight,
            "recipient_blood_type": recipient_blood_type, "recipient_cpra_score": recipient_cpra_score,
            "recipient_years_dialysis": recipient_years_dialysis,
            "recipient_previous_transplant": recipient_previous_transplant,
            "recipient_has_diabetes": recipient_has_diabetes,
            "recipient_hla_a": recipient_hla_a, "recipient_hla_b": recipient_hla_b, "recipient_hla_c": recipient_hla_c,
            "recipient_hla_drb1": recipient_hla_drb1, "recipient_hla_dqb1": recipient_hla_dqb1, "epts_score": epts_score,
            # Compatibility features
            "abo_compatible": abo_compatible, "total_hla_mismatch": total_hla_mismatch,
            # Label/Score
            "match_score": np.clip(match_score, 0, 100),
            "compatible": compatible  # <--- Binary label
        })

    # df = pd.DataFrame(data)
    # df.to_csv("kidney_compatibility_matches.csv", index=False)
    # return df
    return pd.DataFrame(data)

# Generate the dummy dataset (uncomment to create file)
# df = generate_kidney_compatibility_dataset(1000)


## saving it into google drive


In [12]:
import os
if __name__ == "__main__":
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Create a folder for the project in Drive
    save_path = "/content/drive/MyDrive/organ_matching_project"
    os.makedirs(save_path, exist_ok=True)

    df = generate_kidney_compatibility_dataset(500)
    csv_path = os.path.join(save_path, "dummy_organ_data_kidney.csv")
    df.to_csv(csv_path, index=False)
    print(f"Dataset saved to {csv_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset saved to /content/drive/MyDrive/organ_matching_project/dummy_organ_data_kidney.csv
