# Map INCLUDE DS-Connect Data



Given these files for these variables:
- tsv_file="../data/DS-Connect-STUDY/raw_data/TSV_preprocessed/dsc_demographics_preprocessed.tsv",
- spec_file="../data/DS-Connect-STUDY/model_transformation/dsc_transformation_PARTICIPANT-ONLY.yaml",
- source_schema="../data/DS-Connect-STUDY/study_specific_model/DS-Connect_INCLUDE_SCHEMA_v5.yaml",
- target_schema="../data/DS-Connect-STUDY/include_schema/include_schema.yaml",


Map the study data to the INCLUDE LinkML model

In [None]:
import pandas as pd
import yaml
from linkml_map.transformer.object_transformer import ObjectTransformer
from linkml_runtime import SchemaView

In [None]:
def single_spec_transform(tsv_file, spec_file, source_schema, target_schema, target_class="Participant"):
    # Load your mapping spec
    with open(spec_file) as f:
        spec = yaml.safe_load(f)

    # Load your TSV data
    # TODO: Consider removing use of pandas and use TsvLoader from LinkML
    df = pd.read_csv(tsv_file, sep="\t")
    df = df.fillna("")
    data = df.to_dict(orient="records")

    # Another option to fix value_mappings - Set up the transformer once
    transformer = ObjectTransformer(unrestricted_eval=True)
    transformer.source_schemaview = SchemaView(source_schema)
    transformer.target_schemaview = SchemaView(target_schema)

    results = []
    for block in spec:
        if "class_derivations" in block and target_class in block["class_derivations"]:
            class_spec = block["class_derivations"][target_class]
            # Load this class mapping spec
            transformer.create_transformer_specification(block)
            for row in data:
                mapped = transformer.map_object(row, source_type=class_spec["populated_from"])
                results.append(mapped)
            break  # Only process the first matching block

    return results


# Example usage:
results = list(
    single_spec_transform(
        tsv_file="../data/DS-Connect-STUDY/raw_data/TSV_preprocessed/dsc_demographics_preprocessed.tsv",
        spec_file="../data/DS-Connect-STUDY/model_transformation/dsc_transformation_PARTICIPANT-ONLY.yaml",
        source_schema="../data/DS-Connect-STUDY/study_specific_model/DS-Connect_INCLUDE_SCHEMA_v5.yaml",
        target_schema="../data/DS-Connect-STUDY/include_schema/include_schema.yaml",
    )
)

# FOR target_class="Participant"
with open("DSC-demographics_transformed_FINAL.yaml", "w") as f:
    yaml.safe_dump(results, f, sort_keys=False, allow_unicode=True)


# FOR target_class="Condition"
# with open("conditions_transformed_FINAL.yaml", "w") as f:
#     yaml.safe_dump(results, f, sort_keys=False, allow_unicode=True)