# Preparing Dataset with Google colab and Google drive


## mounting google drive

In [6]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

## A function to prepare a data set for our model


In [None]:
import random
import pandas as pd
import os

def create_dataset(num_samples=500):
    organ_types = ["Kidney", "Liver", "Heart", "Lung"]
    blood_types = ["A", "B", "AB", "O"]
    genders = ["Male", "Female"]
    hla_a = ["A1", "A2", "A3"]
    hla_b = ["B7", "B8", "B27"]
    hla_c = ["Cw3", "Cw4", "Cw5"]
    hla_drb1 = ["DR1", "DR4", "DR7"]
    hla_dqb1 = ["DQ2", "DQ3", "DQ4"]

    data = []
    for _ in range(num_samples):
        donor_organ = random.choice(organ_types)
        donor_blood = random.choice(blood_types)
        donor_age = random.randint(18, 60)
        donor_gender = random.choice(genders)
        donor_hla = {
            "hla_a": random.choice(hla_a),
            "hla_b": random.choice(hla_b),
            "hla_c": random.choice(hla_c),
            "hla_drb1": random.choice(hla_drb1),
            "hla_dqb1": random.choice(hla_dqb1),
        }

        patient_organ = donor_organ if random.random() > 0.3 else random.choice(organ_types)
        patient_blood = random.choice(blood_types)
        patient_age = random.randint(5, 70)
        patient_gender = random.choice(genders)
        patient_priority = random.randint(1, 5)
        patient_hla = {
            "hla_a_p": random.choice(hla_a),
            "hla_b_p": random.choice(hla_b),
            "hla_c_p": random.choice(hla_c),
            "hla_drb1_p": random.choice(hla_drb1),
            "hla_dqb1_p": random.choice(hla_dqb1),
        }

        score = 0
        if donor_blood == patient_blood or patient_blood == "AB":
            score += 30
        if donor_organ == patient_organ:
            score += 20
        hla_matches = sum(
            donor_hla[key.replace("_p", "")] == patient_hla[key]
            for key in patient_hla
        )
        score += hla_matches * 5
        score += patient_priority * 10
        if donor_gender == patient_gender:
            score += 5
        age_diff = abs(donor_age - patient_age)
        score += 10 if age_diff <= 10 else (5 if age_diff <= 20 else 0)

        data.append({
            "organ_type": donor_organ,
            "donor_blood_type": donor_blood,
            "donor_age": donor_age,
            "donor_gender": donor_gender,
            **donor_hla,
            "organ_needed": patient_organ,
            "blood_type": patient_blood,
            "age": patient_age,
            "gender": patient_gender,
            "priority_status": patient_priority,
            **patient_hla,
            "score": min(score, 100)
        })

    return pd.DataFrame(data)



## Calling funciton and saving it into google drive


In [7]:
if __name__ == "__main__":
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Create a folder for the project in Drive
    save_path = "/content/drive/MyDrive/organ_matching_project"
    os.makedirs(save_path, exist_ok=True)

    df = create_dataset(400)
    csv_path = os.path.join(save_path, "dummy_organ_data.csv")
    df.to_csv(csv_path, index=False)
    print(f"Dataset saved to {csv_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset saved to /content/drive/MyDrive/organ_matching_project/dummy_organ_data.csv
