In [None]:
from pathlib import Path
import os
# import re
# import subprocess
import yaml

from linkml.validator.loaders import TsvLoader
# from linkml.utils.schema_builder import SchemaBuilder

# from linkml_runtime.linkml_model import SlotDefinition
from linkml_runtime import SchemaView

# from linkml_map.session import Session
from linkml_map.transformer.object_transformer import ObjectTransformer

In [None]:
var_dir = "/sbgenomics/workspace/NHLBI-BDC-DMC-HV/priority_variables_transform/FHS-ingest/"

In [None]:
source_sv = SchemaView("/sbgenomics/workspace/output/Schema_FHS_v31_c1/schema-automator-data/Schema_FHS_v31_c1.yaml")
source_schema = source_sv.schema

target_sv = SchemaView("/sbgenomics/workspace/NHLBI-BDC-DMC-HM/src/bdchm/schema/bdchm.yaml")
target_schema = target_sv.schema

var_dir = "/sbgenomics/workspace/NHLBI-BDC-DMC-HV/priority_variables_transform/FHS-ingest/"

In [None]:
# Person - top level class
# person_yaml = yaml.safe_load("""
# class_derivations:
#   Person:
#     populated_from: pht000009
#     slot_derivations:
#       species:
#         expr: "'Homo Sapiens'"
#       identity:
#         populated_from: dbGaP_Subject_ID
# """)

person_yaml = yaml.safe_load(open(str(var_dir + "" + "cause_of_death" + ".yaml")))
first_person = person_yaml[0]
print(yaml.dump(first_person))

# # Dump to YAML
# with open(var_dir + "person" + ".yaml", "w") as f:
#     yaml.dump(person_yaml, f, sort_keys=False, allow_unicode=True)

In [None]:
transform_spec = first_person

data_loader = TsvLoader("/sbgenomics/workspace/output/FHS_v31_c1/pht000094.tsv")
data_rows = data_loader.iter_instances()

first_row = next(data_rows)
cur_row = first_row

# cur_row["phv00190829"] = 1

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(cur_row, source_type="pht000094")

print(result)
print("Transformation Successful!")

In [None]:
class LazyTsvLoader:
    def __init__(self, base_path):
        self.base_path = base_path
        self.cache = {}

    def __getitem__(self, pht_id):
        if pht_id not in self.cache:
            file_path = os.path.join(self.base_path, f"{pht_id}.tsv")
            if not os.path.exists(file_path):
                raise FileNotFoundError(f"No TSV file found for {pht_id} at {file_path}")
            loader = TsvLoader(file_path)
            self.cache[pht_id] = list(loader.iter_instances())
        return self.cache[pht_id]

    def __contains__(self, pht_id):
        return os.path.exists(os.path.join(self.base_path, f"{pht_id}.tsv"))

lazy_loader = LazyTsvLoader("/sbgenomics/workspace/output/FHS_v31_c1")

In [None]:
transform_yaml = yaml.safe_load(open(str(var_dir + "" + "qrs_ekg" + ".yaml")))

all_results = []

# transformer = ObjectTransformer(unrestricted_eval=True)
# transformer.source_schemaview = SchemaView(source_schema)
# transformer.target_schemaview = SchemaView(target_schema)

for block in transform_yaml:
    derivation = block["class_derivations"]
    for class_name, class_spec in derivation.items():
        pht_id = class_spec["populated_from"]
        print(f"Processing {pht_id} for class {class_name}")

        rows = lazy_loader[pht_id]
        
        transformer = ObjectTransformer(unrestricted_eval=True)
        transformer.source_schemaview = SchemaView(source_schema)
        transformer.target_schemaview = SchemaView(target_schema)
        transformer.create_transformer_specification(block)

        for row in rows:
            mapped = transformer.map_object(row, source_type=pht_id)
            all_results.append(mapped)

with open("output.yaml", "w") as f:
    yaml.dump(all_results, f)

In [None]:
print(pht_id)

In [None]:
import csv
from pathlib import Path

class LazySubjectDict(dict):
    """
    Lazily loads per-pht data for a single subject on demand.
    """
    def __init__(self, subject_id, data_dir):
        super().__init__()
        self.subject_id = subject_id
        self.data_dir = Path(data_dir)
        self._cache = {}

    def __getitem__(self, pht_id):
        if pht_id in self._cache:
            return self._cache[pht_id]

        file_path = self.data_dir / f"{pht_id}.tsv"
        if not file_path.exists():
            raise KeyError(f"No such file: {file_path}")

        with open(file_path, newline='') as f:
            reader = csv.DictReader(f, delimiter="\t")
            for row in reader:
                if row.get("dbGaP_Subject_ID") == self.subject_id:
                    self._cache[pht_id] = row
                    return row

        raise KeyError(f"Subject {self.subject_id} not found in {pht_id}")

In [None]:
# Participant - top level class for study data
participant_yaml = yaml.safe_load("""
class_derivations:
  Participant:
    populated_from: pht000395
    slot_derivations:
      # associated_participant:
      #   populated_from: phv00007675
      identity:
        populated_from: dbGaP_Subject_ID
      member_of_research_study:
        expr: "'FHS'"
""")

# # Dump to YAML
# with open(var_dir + "participant" + ".yaml", "w") as f:
#     yaml.dump(participant_yaml, f, sort_keys=False, allow_unicode=True)

In [None]:
# Load MeasurementObservation class derivations
bdy_hgt = refactored_docs
# bdy_hgt = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bdy_hgt" + ".yaml")))
# bdy_wgt = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bdy_wgt" + ".yaml")))
# bmi = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bmi" + ".yaml")))
# bp_diastolic = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bp_diastolic" + ".yaml")))
# bp_systolic = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bp_systolic" + ".yaml")))
# fev1 = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "fev1" + ".yaml")))
# fev1_fvc = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "fev1_fvc" + ".yaml")))
# fvc = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "fvc" + ".yaml")))
# hrt_rt = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "hrt_rt" + ".yaml")))
# spo2 = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "spo2" + ".yaml")))

# Get the demography slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_exposures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("exposures", {})

# Add the Demography object_derivation to the demography slot
participant_exposures_slot.setdefault("object_derivations", bdy_hgt)
# participant_exposures_slot.setdefault("object_derivations", [
#     bdy_hgt,
#     tak_betablk,
#     tak_adrenergics,
#     tak_cort_steroid_resp,
#     tak_cort_steroid_oral,
#     tak_anabolic_steroid,
# ])

In [None]:
transform_spec = participant_yaml

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="pht000009")

print(result)
print("Transformation Successful!")

In [None]:
# Let's start with one simple condition
angina = yaml.safe_load(open(str(var_dir + "angina" + ".yaml")))
# print(yaml.dump(angina))

# Get the conditions slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_exposures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("conditions", {})

# Add the conditions object_derivation to the demography slot
# participant_exposures_slot.setdefault("object_derivations", [
#     angina,
#     # asthma,
#     # copd,
#     # diabetes,
#     # hist_hrt_failure,
#     # hist_my_inf,
#     # hyperten,
#     # pad,
#     # slp_ap,
#     # stroke,
#     # stroke_isch_atk,
# ])

participant_exposures_slot.setdefault("object_derivations", angina)

In [None]:
data_loader = TsvLoader("/sbgenomics/workspace/output/FHS_v31_c1/pht000395.tsv")
data_rows = data_loader.iter_instances()

first_row = next(data_rows)
cur_row = first_row

transform_spec = participant_yaml

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="pht000395")

print(result)
print("Transformation Successful!")

In [None]:
transform_spec = refactored_docs


data_loader = TsvLoader("/sbgenomics/workspace/output/FHS_v31_c1/pht000009.tsv")
data_rows = data_loader.iter_instances()

first_row = next(data_rows)
cur_row = first_row

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="pht000009")

print(result)
print("Transformation Successful!")

In [None]:
import csv
from pathlib import Path

class LazySubjectDict(dict):
    """
    Lazily loads per-pht data for a single subject on demand.
    """
    def __init__(self, subject_id, data_dir):
        super().__init__()
        self.subject_id = subject_id
        self.data_dir = Path(data_dir)
        self._cache = {}

    def __getitem__(self, pht_id):
        if pht_id in self._cache:
            return self._cache[pht_id]

        file_path = self.data_dir / f"{pht_id}.tsv"
        if not file_path.exists():
            raise KeyError(f"No such file: {file_path}")

        with open(file_path, newline='') as f:
            reader = csv.DictReader(f, delimiter="\t")
            for row in reader:
                if row.get("dbGaP_Subject_ID") == self.subject_id:
                    self._cache[pht_id] = row
                    return row

        raise KeyError(f"Subject {self.subject_id} not found in {pht_id}")

In [None]:
import csv
from pathlib import Path

class LazySubjectDict(dict):
    """
    Lazily loads per-pht data for a single subject on demand.
    """
    def __init__(self, subject_id, data_dir):
        super().__init__()
        self.subject_id = subject_id
        self.data_dir = Path(data_dir)
        self._cache = {}

    def __getitem__(self, pht_id):
        if pht_id in self._cache:
            return self._cache[pht_id]

        file_path = self.data_dir / f"{pht_id}.tsv"
        if not file_path.exists():
            raise KeyError(f"No such file: {file_path}")

        with open(file_path, newline='') as f:
            reader = csv.DictReader(f, delimiter="\t")
            for row in reader:
                if row.get("dbGaP_Subject_ID") == self.subject_id:
                    self._cache[pht_id] = row
                    return row

        raise KeyError(f"Subject {self.subject_id} not found in {pht_id}")

In [None]:
source_sv = SchemaView("/sbgenomics/workspace/output/Schema_FHS_v31_c1/schema-automator-data/Schema_FHS_v31_c1.yaml")
source_schema = source_sv.schema

target_sv = SchemaView("NHLBI-BDC-DMC-HM/src/bdchm/schema/bdchm.yaml")
target_schema = target_sv.schema

data_loader = TsvLoader("/sbgenomics/workspace/output/FHS_v31_c1/pht000030.tsv")
data_rows = data_loader.iter_instances()

first_row = next(data_rows)
cur_row = first_row

var_dir = "NHLBI-BDC-DMC-HV/priority_variables_transform/FHS/"
print(cur_row)

In [None]:
# Person - top level class
person_yaml = yaml.safe_load("""
class_derivations:
  Person:
    populated_from: pht000030
    slot_derivations:
      species:
        expr: "'Homo Sapiens'"
      identity:
        populated_from: dbGaP_Subject_ID
""")

# # Dump to YAML
# with open(var_dir + "person" + ".yaml", "w") as f:
#     yaml.dump(person_yaml, f, sort_keys=False, allow_unicode=True)

In [None]:
# Participant - top level class for study data
participant_yaml = yaml.safe_load("""
class_derivations:
  Participant:
    populated_from: pht000030
    slot_derivations:
      # associated_participant: 
      #   populated_from: phv00007675
      identity:
        populated_from: dbGaP_Subject_ID
      member_of_research_study:
        expr: "'FHS'"
""")

# # Dump to YAML
# with open(var_dir + "participant" + ".yaml", "w") as f:
#     yaml.dump(participant_yaml, f, sort_keys=False, allow_unicode=True)

In [None]:
transform_spec = participant_yaml

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="pht000030")

print(result)
print("Transformation Successful!")

In [None]:
# Get the participants slot
person_class = person_yaml.setdefault("class_derivations", {}).setdefault("Person", {})
person_participants_slot = person_class.setdefault("slot_derivations", {}).setdefault("participants", {})

# Add the Participant object_derivation to the participants slot
person_participants_slot.setdefault("object_derivations", [ participant_yaml ])

# print(yaml.dump(person_yaml, sort_keys=False))

In [None]:
transform_spec = person_yaml

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="pht000030")

print(result)
print("Transformation Successful!")

In [None]:
# Let's start with one simple condition
angina = yaml.safe_load(open(str(var_dir + "angina" + ".yaml")))
# print(yaml.dump(angina))

# Get the conditions slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_exposures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("conditions", {})

# Add the conditions object_derivation to the demography slot
participant_exposures_slot.setdefault("object_derivations", [
    angina,
    # asthma,
    # copd,
    # diabetes,
    # hist_hrt_failure,
    # hist_my_inf,
    # hyperten,
    # pad,
    # slp_ap,
    # stroke,
    # stroke_isch_atk,
])

In [None]:
print(yaml.dump(person_yaml))

In [None]:
transform_spec = person_yaml

data_loader = TsvLoader("/sbgenomics/workspace/output/FHS_v31_c1/pht000030.tsv")
data_rows = data_loader.iter_instances()

first_row = next(data_rows)
cur_row = first_row

other_data_loader = TsvLoader("/sbgenomics/workspace/output/FHS_v31_c1/pht000395.tsv")
other_data_rows = other_data_loader.iter_instances()

other_first_row = next(other_data_rows)
other_cur_row = other_first_row

input_data = {
    "pht000030": cur_row,
    "pht000395": other_cur_row
}


# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="FHS")

print(result)
print("Transformation Successful!")

In [None]:
# Load Condition class derivations
angina = yaml.safe_load(open(str(var_dir + "condition/" + "angina" + ".yaml")))
asthma = yaml.safe_load(open(str(var_dir + "condition/" + "asthma" + ".yaml")))
copd = yaml.safe_load(open(str(var_dir + "condition/" + "copd" + ".yaml")))
diabetes = yaml.safe_load(open(str(var_dir + "condition/" + "diabetes" + ".yaml")))
hist_hrt_failure = yaml.safe_load(open(str(var_dir + "condition/" + "hist_hrt_failure" + ".yaml")))
hist_my_inf = yaml.safe_load(open(str(var_dir + "condition/" + "hist_my_inf" + ".yaml")))
hyperten = yaml.safe_load(open(str(var_dir + "condition/" + "hyperten" + ".yaml")))
pad = yaml.safe_load(open(str(var_dir + "condition/" + "pad" + ".yaml")))
slp_ap = yaml.safe_load(open(str(var_dir + "condition/" + "slp_ap" + ".yaml")))
stroke = yaml.safe_load(open(str(var_dir + "condition/" + "stroke" + ".yaml")))
stroke_isch_atk = yaml.safe_load(open(str(var_dir + "condition/" + "stroke_isch_atk" + ".yaml")))

# Get the conditions slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_exposures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("conditions", {})

# Add the conditions object_derivation to the demography slot
participant_exposures_slot.setdefault("object_derivations", [
    angina,
    asthma,
    copd,
    diabetes,
    hist_hrt_failure,
    hist_my_inf,
    hyperten,
    pad,
    slp_ap,
    stroke,
    stroke_isch_atk,
])

In [None]:
demography_yaml = yaml.safe_load("""
class_derivations:
  Demography:
    populated_from: COPDGene
    slot_derivations:
      associated_participant:
        populated_from: phv00159568
      sex:
        populated_from: phv00159571
        value_mappings:
          '1': OMOP:8507  # MALE
          '2': OMOP:8532  # FEMALE
      ethnicity:
        populated_from: phv00159573
        value_mappings:
          '1': HISPANIC_OR_LATINO
          '2': NOT_HISPANIC_OR_LATINO
      race:
        populated_from: phv00159572
        value_mappings:
          '1': OMOP:8527
          '2': OMOP:8516
          '3': OMOP:8515
          '4': OMOP:8557
          '5': OMOP:8657
          '6': OMOP:45880900
          '7': OMOP:8552
""")

# Dump to YAML
with open(var_dir + "demography" + ".yaml", "w") as f:
    yaml.dump(demography_yaml, f, sort_keys=False, allow_unicode=True)

In [None]:
# Get the demography slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_demography_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("demography", {})

# Add the Demography object_derivation to the demography slot
participant_demography_slot.setdefault("object_derivations", [ demography_yaml ])

In [None]:
# Load DrugExposure class derivations
tak_betablk_resp = yaml.safe_load(open(str(var_dir + "exposure/" + "tak_betablk_resp" + ".yaml")))
tak_betablk = yaml.safe_load(open(str(var_dir + "exposure/" + "tak_betablk" + ".yaml")))
tak_adrenergics = yaml.safe_load(open(str(var_dir + "exposure/" + "tak_adrenergics" + ".yaml")))
tak_cort_steroid_resp = yaml.safe_load(open(str(var_dir + "exposure/" + "tak_cort_steroid_resp" + ".yaml")))
tak_cort_steroid_oral = yaml.safe_load(open(str(var_dir + "exposure/" + "tak_cort_steroid_oral" + ".yaml")))
tak_anabolic_steroid = yaml.safe_load(open(str(var_dir + "exposure/" + "tak_anabolic_steroid" + ".yaml")))

# Get the demography slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_exposures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("exposures", {})

# Add the Demography object_derivation to the demography slot
participant_exposures_slot.setdefault("object_derivations", [
    tak_betablk_resp,
    tak_betablk,
    tak_adrenergics,
    tak_cort_steroid_resp,
    tak_cort_steroid_oral,
    tak_anabolic_steroid,
])

In [None]:
# Load MeasurementObservation class derivations
bdy_hgt = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bdy_hgt" + ".yaml")))
bdy_wgt = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bdy_wgt" + ".yaml")))
bmi = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bmi" + ".yaml")))
bp_diastolic = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bp_diastolic" + ".yaml")))
bp_systolic = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "bp_systolic" + ".yaml")))
fev1 = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "fev1" + ".yaml")))
fev1_fvc = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "fev1_fvc" + ".yaml")))
fvc = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "fvc" + ".yaml")))
hrt_rt = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "hrt_rt" + ".yaml")))
spo2 = yaml.safe_load(open(str(var_dir + "measurement_observation/" + "spo2" + ".yaml")))

# Get the demography slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_exposures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("exposures", {})

# Add the Demography object_derivation to the demography slot
participant_exposures_slot.setdefault("object_derivations", [
    bdy_hgt,
    tak_betablk,
    tak_adrenergics,
    tak_cort_steroid_resp,
    tak_cort_steroid_oral,
    tak_anabolic_steroid,
])

In [None]:
observation_yaml = yaml.safe_load("""
class_derivations:
  Observation:
    populated_from: COPDGene
    slot_derivations:
      associated_participant:
        populated_from: phv00159568
      observation_type:
        expr: "'OMOP:4282779'"  # Cigarette smoking status
      value_enum:
        expr: "'OMOP:40766945' if {phv00159749} == 1 else 'OMOP:45883458' if {phv00159747} == 1 else 'OMOP:45883537'"
""")

# Dump to YAML
with open(var_dir + "observation" + ".yaml", "w") as f:
    yaml.dump(observation_yaml, f, sort_keys=False, allow_unicode=True)

In [None]:
# Get the observations slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_observations_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("observations", {})

# Add the Demography object_derivation to the demography slot
participant_observations_slot.setdefault("object_derivations", [ observation_yaml ])

# print(yaml.dump(person_yaml))

In [None]:
# hist_cor_angio
hist_cor_angio = yaml.safe_load("""
class_derivations:
  Procedure:
    populated_from: COPDGene
    slot_derivations:
      associated_participant:
        populated_from: phv00159568
      procedure_concept:
        expr: "'OMOP:4184832'"  # Coronary angioplasty
      procedure_status:
        populated_from: phv00159632
        value_mappings:
          '0': ABSENT
          '1': PRESENT
""")

# Dump to YAML
with open(var_dir + "procedure/" + "hist_cor_angio" + ".yaml", "w") as f:
    yaml.dump(hist_cor_angio, f, sort_keys=False, allow_unicode=True)

In [None]:
transform_spec = hist_cor_angio

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

result = transformer.map_object(input_data, source_type="COPDGene")

print(result)
print("Transformation Successful!")

In [None]:
# hist_cor_bypg
hist_cor_bypg = yaml.safe_load("""
class_derivations:
  Procedure:
    populated_from: COPDGene
    slot_derivations:
      associated_participant:
        populated_from: phv00159568
      procedure_concept:
        expr: "'OMOP:4336464'"  #coronary bypass graft
      procedure_status:
        populated_from: phv00159631
        value_mappings:
          '0': ABSENT
          '1': PRESENT
""")

# Dump to YAML
with open(var_dir + "procedure/" + "hist_cor_bypg" + ".yaml", "w") as f:
    yaml.dump(hist_cor_bypg, f, sort_keys=False, allow_unicode=True)

In [None]:
# Get the observations slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_procedures_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("procedures", {})

# Add the Demography object_derivation to the demography slot
participant_procedures_slot.setdefault("object_derivations", [ hist_cor_angio, hist_cor_bypg ])

In [None]:
# edu_lvl
edu_lvl = yaml.safe_load("""
class_derivations:
  SdohObservation:
    populated_from: COPDGene
    slot_derivations:
      associated_participant:
        populated_from: phv00159568
      category:
        expr: "'EDUCATIONAL_ATTAINMENT'"
      value_enum:
        populated_from: phv00159773
        value_mappings:
          '1': 8TH_GRADE_OR_LESS
          '2': HIGH_SCHOOL_NO_DIPLOMA
          '3': HIGH_SCHOOL_GRADUATE_GED
          '4': SOME_COLLEGE_OR_TECH_NO_DEGREE
          '5': COLLEGE_OR_TECH_WITH_DEGREE
          '6': MASTERS_OR_DOCTORAL_DEGREE
""")

# Dump to YAML
with open(var_dir + "sdoh_observation/" + "edu_lvl" + ".yaml", "w") as f:
    yaml.dump(edu_lvl, f, sort_keys=False, allow_unicode=True)

In [None]:
# Get the observations slot on Participants class
participant_cls = participant_yaml.setdefault("class_derivations", {}).setdefault("Participant", {})
participant_sdoh_observations_slot = participant_cls.setdefault("slot_derivations", {}).setdefault("sdoh_observations", {})

# Add the Demography object_derivation to the demography slot
participant_sdoh_observations_slot.setdefault("object_derivations", [ edu_lvl ])

In [None]:
transform_spec = person_yaml

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

# Transform all rows
output_data = []
for row in data_rows:
    result = transformer.map_object(row, source_type="COPDGene")
    if result:  # Avoid None or empty dicts
        output_data.append(result)

# Final wrapped structure (key should match the collection slot, or be schema-compatible)
wrapped_output = {
    "persons": output_data
}

# Dump to YAML
with open("transformed_person_data_DS_CS.yaml", "w") as f:
    yaml.dump(wrapped_output, f, sort_keys=False, allow_unicode=True)

print("Transformation Successful!")

In [None]:
transform_spec = person_yaml

input_data = cur_row

# Create ObjectTransformer and apply transformation
transformer = ObjectTransformer(unrestricted_eval=True)
transformer.source_schemaview = SchemaView(source_schema)
transformer.target_schemaview = SchemaView(target_schema)
transformer.create_transformer_specification(transform_spec)

# Transform all rows
output_data = []
for row in data_rows:
    result = transformer.map_object(row, source_type="COPDGene")
    if result:  # Avoid None or empty dicts
        output_data.append(result)

# Final wrapped structure (key should match the collection slot, or be schema-compatible)
wrapped_output = {
    "persons": output_data
}

# Dump to YAML
with open("transformed_person_data_DS_CS.yaml", "w") as f:
    yaml.dump(wrapped_output, f, sort_keys=False, allow_unicode=True)

print("Transformation Successful!")

In [None]:
# Dump final Person class to YAML
with open(var_dir + "person" + ".yaml", "w") as f:
    yaml.dump(person_yaml, f, sort_keys=False, allow_unicode=True)