# Map-INCLUDE Brain Power

This notebook is to map the Brain Power study data to the INCLUDE LinkML model.


In [1]:
import yaml
import pandas as pd

from linkml.validator.loaders import TsvLoader
from linkml_runtime import SchemaView
from linkml_map.transformer.object_transformer import ObjectTransformer

In [2]:
class DataLoader:
    def __init__(self, base_path):
        self.base_path = base_path

    def __getitem__(self, pht_id):
        file_path = os.path.join(self.base_path, f"{entity}.tsv")
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"No TSV file found for {entity} at {file_path}")
        return TsvLoader(os.path.join(self.base_path, f"{entity}.tsv")).iter_instances()

    def __contains__(self, pht_id):
        return os.path.exists(os.path.join(self.base_path, f"{entity}.tsv"))

In [3]:
def single_spec_transform(tsv_file, spec_file, source_schema, target_schema, target_class="Participant"):
    # Load your mapping spec
    with open(spec_file) as f:
        spec = yaml.safe_load(f)    
    
    # Load your TSV data
    # TODO: Consider removing use of pandas and use TSVLoader from LinkML
    # data = pd.read_csv(tsv_file, sep="\t").to_dict(orient="records")
    df = pd.read_csv(tsv_file, sep="\t")
    df = df.fillna('')
    data = df.to_dict(orient="records")

    # Another option to fix value_mappings - Set up the transformer once
    transformer = ObjectTransformer(unrestricted_eval=True)
    transformer.source_schemaview = SchemaView(source_schema)
    transformer.target_schemaview = SchemaView(target_schema)

    results = []
    for block in spec:
        if 'class_derivations' in block and target_class in block['class_derivations']:
            class_spec = block['class_derivations'][target_class]
            # Load this class mapping spec
            transformer.create_transformer_specification(block)
            for row in data:
                mapped = transformer.map_object(row, source_type=class_spec["populated_from"])
                results.append(mapped)
            break  # Only process the first matching block

    return results


# Example usage:
results = list(single_spec_transform(
    tsv_file="../data/BrainPower-STUDY/raw_data/TSV_Transformed/demographics_with_age_timepoints.tsv",
    # tsv_file="../data/BrainPower-STUDY/raw_data/TSV_Transformed/healthconditions_all_cols.tsv", # Use with Condition as target class
    spec_file="../data/BrainPower-STUDY/model_transformation/brain_power_transformation_PARTICIPANT-ONLY.yaml",
    # spec_file="../data/BrainPower-STUDY/model_transformation/brain_power_transformation_CONDITION-ONLY.yaml", # Use with Condition as target class
    source_schema="../data/BrainPower-STUDY/study_specific_model/BrainPower_INCLUDE_SCHEMA_v4.yaml",
    target_schema="../data/BrainPower-STUDY/include_schema/include_schema.yaml",
    target_class="Participant"
    # target_class="Condition"
))

# FOR target_class="Participant"
with open("BP-demographics_transformed_FINAL.yaml", "w") as f:
    yaml.safe_dump(results, f, sort_keys=False, allow_unicode=True)


# # FOR target_class="Condition"
# with open("BP-conditions_transformed_FINAL.yaml", "w") as f:
#     yaml.safe_dump(results, f, sort_keys=False, allow_unicode=True)
