# HMGCR, Morales-Rosado (2023)

Data derived from [Morales-Rosado JA, et al. (2023) Bi-allelic variants in HMGCR cause an autosomal-recessive progressive limb-girdle muscular dystrophy. Am J Hum Genet;110(6):989-997. PMID:37167966](https://pubmed.ncbi.nlm.nih.gov/37167966/)



In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import HTML, display
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import importlib.metadata
__version__ = importlib.metadata.version("pyphetools")
print(f"Using pyphetools version {__version__}")

Using pyphetools version 0.9.64


In [2]:
PMID="PMID:37167966"
title = "Bi-allelic variants in HMGCR cause an autosomal-recessive progressive limb-girdle muscular dystrophy"
cite = Citation(pmid=PMID, title=title)
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", citation=cite)
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2024-02-27


In [3]:
df = pd.read_excel("input/HMGCR_Morales_2023.xlsx")

In [4]:
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft['individual_id'] = dft.index  # Set the new column 'patient_id' to be identical to the contents of the index
dft.head() # check the transposed table

individual_id,Age (yr),Sex,cDNA (NM_000859.2),Protein (NP_00850.1),Age of first elevated CK,CK values (HP:0003236),Proximal weakness (HP:0003701),Axial weakness (HP:0003327),Muscle atrophy (HP:0003202),Calf hypertrophy (HP:0008981),...,SDH,Porin+CompII,Porin+Comp II,Porin+CompIII,Porin+CompIV-1,Porin+CompIV-1I,Porin+PDH,Porin,ETC deficiency,individual_id.1
F1-II:1,35,M,"c.1328G>A, c.1867 G>A","p.Arg443Gln, p.Asp623Asn",P6Y,"1,378–4,325",+,+,"+, proximal",+,...,,,,,,,,,,F1-II:1
F1-II:2,37,M,"c.1328G>A, c.1867 G>A","p.Arg443Gln, p.Asp623Asn",P8Y,,+,+,,−,...,+/-,n.r.,n.r.,n.r.,n.r.,n.r.,---,n.r.,n.r.,F1-II:2
F1-II:3,39,M,"c.1328G>A, c.1867 G>A","p.Arg443Gln, p.Asp623Asn",P10Y,,+,,,−,...,,,,,,,,,,F1-II:3
F2-II:1,19,F,"c.365+4A>G, c.2375A>G","?, p.Tyr792Cys",P7Y,"8,500–12,600",+,−,−,−,...,n.r.,---,---,---,---,---,n.r.,+/-,+/-,F2-II:1
F2-II:2,22,M,"c.365+4A>G, c.2375A>G","?, p.Tyr792Cys",P13Y,"2,000–3,000",+,+,"+, diffuse",−,...,+/-,n.r.,n.r.,n.r.,n.r.,n.r.,n.r.,n.r.,+/-,F2-II:2


In [5]:
# add each allele to its own column
def get_allele(cell_contents, n):
    fields = cell_contents.split(",")
    if len(fields) == 1:
        return fields[0].strip()
    return fields[n].strip()
dft["allele_1"] = dft["cDNA (NM_000859.2)"].apply(lambda x: get_allele(x,0))
dft["allele_2"] = dft["cDNA (NM_000859.2)"].apply(lambda x: get_allele(x,1))
#dft.head()

In [6]:
generator = SimpleColumnMapperGenerator(df=dft, observed="+", excluded="-", hpo_cr=hpo_cr)
column_mapper_list = generator.try_mapping_columns()
display(HTML(generator.to_html()))

Result,Columns
Mapped,CK values (HP:0003236); Proximal weakness (HP:0003701); Axial weakness (HP:0003327); Muscle atrophy (HP:0003202); Calf hypertrophy (HP:0008981); Myalgias (HP:0003326); Reduced deep tendon reflexes (HP:0001315); Gait disturbance (HP:0001288); Loss of ambulation (HP:0002505); Reduced respiratory function (HP:0002747); Cardiac abnormalities (HP:0001627)
Unmapped,"Age (yr); Sex; cDNA (NM_000859.2); Protein (NP_00850.1); Age of first elevated CK; Disease progression:; Myofiber granular inclusions (1 per fascile); Degenerating fibers; Rare split fibers; Myofibers with centralized/internalized nuclei; Inflammation; Myonecrosis; Endomysial connective tissue; Fiber size variation; Hypertrophy; Fiber atrophy; Target or targetoid fibers; Ragged red fibers; Rimmed vaculoes; Central core; Whorled fibers; Myelin Bodies ; Membrane turnover; Tubulofilamentous bodies; Mitochondrial features; Glycogen granules; Lipid droplets; Additional Information ; Myosin ATPase (ph 9.4, 4.6 and 4.3); Type 1:2 Abundance of ratio; PAS; NSE (non-specific esterase); Myophosphorylase; Acid phosphatase; Collagen VI; Desmin; Dystrophin; Dysferlin; Calpain 3; Caveolin 3; Merosin (laminin alpha 2); Alpha-dystroglycan; Beta dystroglycan; Beta sarcoglycan; Delta sarcoglycan; Gamma-sarcoglycan; Adhalin; Emerin; Utrophin; NADH; COX; SDH; Porin+CompII; Porin+Comp II; Porin+CompIII; Porin+CompIV-1; Porin+CompIV-1I; Porin+PDH; Porin; ETC deficiency; individual_id; allele_1; allele_2"


In [7]:
ageMapper = AgeColumnMapper.by_year(column_name="Age (yr)")
sexMapper = SexColumnMapper(column_name="Sex", male_symbol="M", female_symbol="F")

In [8]:
centralized_internalized_nuclei_dMapper = SimpleColumnMapper(column_name='Myofibers with centralized/internalized nuclei',
                                                            hpo_id="HP:0003687", hpo_label="Centrally nucleated skeletal muscle fibers",
                                                            observed="+++", excluded="-")
column_mapper_list.append(centralized_internalized_nuclei_dMapper)
centralized_internalized_nuclei_dMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""nan"" -> HP: Centrally nucleated skeletal muscle fibers (HP:0003687) (not measured)",4
1,"original value: ""+++"" -> HP: Centrally nucleated skeletal muscle fibers (HP:0003687) (observed)",3
2,"original value: ""n.r."" -> HP: Centrally nucleated skeletal muscle fibers (HP:0003687) (not measured)",1
3,"original value: ""+/-"" -> HP: Centrally nucleated skeletal muscle fibers (HP:0003687) (not measured)",1


In [9]:
endomysialMapper = SimpleColumnMapper(column_name='Endomysial connective tissue',
                                    hpo_id="HP:0100297", hpo_label="Increased endomysial connective tissue", observed="+++", excluded="-")
column_mapper_list.append(endomysialMapper)
endomysialMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""nan"" -> HP: Increased endomysial connective tissue (HP:0100297) (not measured)",4
1,"original value: ""+/-"" -> HP: Increased endomysial connective tissue (HP:0100297) (not measured)",2
2,"original value: ""+++"" -> HP: Increased endomysial connective tissue (HP:0100297) (observed)",2
3,"original value: ""-"" -> HP: Increased endomysial connective tissue (HP:0100297) (excluded)",1


In [10]:
dropletMapper = SimpleColumnMapper(column_name='Lipid droplets',
                                hpo_id="HP:0012240", hpo_label="Increased intramyocellular lipid droplets", observed="+++", excluded="-")
column_mapper_list.append(dropletMapper)
dropletMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""nan"" -> HP: Increased intramyocellular lipid droplets (HP:0012240) (not measured)",4
1,"original value: ""n.r."" -> HP: Increased intramyocellular lipid droplets (HP:0012240) (not measured)",1
2,"original value: ""+++"" -> HP: Increased intramyocellular lipid droplets (HP:0012240) (observed)",1
3,"original value: ""+/-"" -> HP: Increased intramyocellular lipid droplets (HP:0012240) (not measured)",1
4,"original value: ""-"" -> HP: Increased intramyocellular lipid droplets (HP:0012240) (excluded)",2


In [11]:
type_d = {
 '1<2': 'Type 2 muscle fiber predominance',
 '1>2': 'Type 1 muscle fiber predominance'}
type_ratioMapper = OptionColumnMapper(column_name='Type 1:2 Abundance of ratio',concept_recognizer=hpo_cr, option_d=type_d)
column_mapper_list.append(type_ratioMapper)
type_ratioMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Type 2 muscle fiber predominance (HP:0010602) (observed),1
1,Type 1 muscle fiber predominance (HP:0003803) (observed),4


In [12]:
dystroglycanMapper = SimpleColumnMapper(column_name='Alpha-dystroglycan',
                                        hpo_id="HP:0030099", hpo_label="Reduced muscle fiber alpha dystroglycan",
                                        observed="---", excluded="+")
column_mapper_list.append(dystroglycanMapper)
dystroglycanMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,"original value: ""nan"" -> HP: Reduced muscle fiber alpha dystroglycan (HP:0030099) (not measured)",4
1,"original value: ""+/-"" -> HP: Reduced muscle fiber alpha dystroglycan (HP:0030099) (not measured)",3
2,"original value: ""---"" -> HP: Reduced muscle fiber alpha dystroglycan (HP:0030099) (observed)",1
3,"original value: ""n.r."" -> HP: Reduced muscle fiber alpha dystroglycan (HP:0030099) (not measured)",1


In [13]:
elevatedCKmapper = SimpleColumnMapper(column_name="Age of first elevated CK", hpo_id="HP:0003236",
                                    hpo_label="Elevated circulating creatine kinase concentration",
                                    observed="+", excluded="-")
column_mapper_list.append(elevatedCKmapper)
elevatedCKmapper.preview_column(dft)

ValueError: onset argument must be PyPheToolsAge or subclass but was <class 'str'>

In [None]:
disease = Disease(disease_id="OMIM:620375", disease_label="Muscular dystrophy, limb-girdle, autosomal recessive 28")
encoder = CohortEncoder(df=dft,
                        hpo_cr=hpo_cr,
                        column_mapper_list=column_mapper_list,
                        individual_column_name="individual_id",
                        metadata=metadata,
                        agemapper=ageMapper,
                        sexmapper=sexMapper)
encoder.set_disease(disease=disease)

In [None]:
vmanager = VariantManager(df=dft, individual_column_name="individual_id", gene_symbol="HMGCR", 
                          allele_1_column_name="allele_1", allele_2_column_name="allele_2", transcript="NM_000859.2")
individuals = encoder.get_individuals()
var_d = vmanager.get_variant_d()
for k, v in var_d.items():
    print(k,v)

In [None]:
dft["individual_id"]

In [None]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.BI_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

In [None]:
table = IndividualTable(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))

In [None]:
Individual.output_individuals_as_phenopackets(individual_list=individuals, metadata=metadata)