In [78]:
import pandas as pd
import pyphetools
from IPython.display import display, HTML
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *

pd.set_option('display.max_colwidth', None) # show entire column contents, important!
print(f"Using pyphetools version {pyphetools.__version__}")

parser = HpoParser("phenopackets/hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-1526-4557")
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

df = pd.read_excel("phenopackets/TBX1_phenotype_genotype_trim.xlsx")
df.head(2)

generator = SimpleColumnMapperGenerator(df=df, hpo_cr=hpo_cr, observed="1", excluded="0")
column_mapper_list = generator.try_mapping_columns()
display(HTML(generator.to_html()))


Using pyphetools version 0.9.49
HPO version 2024-02-08


Result,Columns
Mapped,HP:0000316; HP:0000581; HP:0000369; HP:0000377; HP:0000460; HP:0000175; HP:0000347; HP:0001999; HP:0000220; HP:0001636; HP:0010882; HP:0001631; HP:0031834; HP:0011613; HP:0012020; HP:0001629; HP:0001626; HP:0000829; HP:0010515; HP:0005403; HP:0001263; HP:0000407; HP:0010442; HP:0001159; HP:0004322; HP:0100647
Unmapped,Authors; Individual; PMID; Title; Ref. Genome; gene; transcript; allele; variant.comment; Age; Sex; Miscelaneous


In [79]:
var_d = {}
tbx1_transcript = "NM_080647.1"
varValidator = VariantValidator(genome_build="hg38", transcript=tbx1_transcript)
for v in df["allele"].unique():
    var = varValidator.encode_hgvs(v)
    var_d [v] = var
print(f"Extracted {len(var_d)} variants with Variant Validator")

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.1253del/NM_080647.1?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.1274_1281del/NM_080647.1?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.1299_1321del/NM_080647.1?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.1293_1315del/NM_080647.1?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.1399_1428dup/NM_080647.1?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.146_202del/NM_080647.1?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_080647.1%3Ac.443T>A/NM_080647.1?content-type=applicatio

In [80]:
varMapper = VariantColumnMapper(variant_d=var_d,
                               variant_column_name="allele",
                               default_genotype="heterozygous")

In [81]:
ageMapper = AgeColumnMapper.by_year(column_name="Age")
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", column_name="Sex")

In [82]:
TBX1 = Disease(disease_id="OMIM:188400", disease_label="DiGeorge syndrome")
disease_d = {"TBX1": TBX1}
diseaseMapper = DiseaseIdColumnMapper(column_name="gene", disease_id_map=disease_d)

In [83]:
encoder = MixedCohortEncoder(df=df,
                             individual_column_name="Individual",
                             hpo_cr=hpo_cr,
                             column_mapper_list=column_mapper_list,
                             disease_id_mapper=diseaseMapper,
                             metadata=metadata,
                             pmid_column="PMID",
                             title_column="Title",
                             variant_mapper=varMapper,
                             agemapper=ageMapper,
                             sexmapper=sexMapper
                            )

In [84]:
individuals = encoder.get_individuals()

TypeError: Individual.__init__() got an unexpected keyword argument 'age'

In [None]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

In [None]:
cohort = cvalidator.get_error_free_individual_list()
table = PhenopacketTable(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))

In [None]:
Individual.output_individuals_as_phenopackets(individual_list=individuals, metadata=metadata)