In [None]:
import phenopackets
import os
from os.path import isfile
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict

In [None]:
from phenopackets import Phenopacket
import varcode as vc
import myvariant as mv
import numpy as np
import pyensembl
import glob
import pandas as pd
from collections import defaultdict

In [None]:
retinoblastoma = '../phenopackets/retinoblastoma.json'
nemalineMyopathy = '../phenopackets/nemalineMyopathy.json'
if not isfile(retinoblastoma):
    raise FileNotFoundError("Could not find phenopacket")

In [None]:
import json

with open(nemalineMyopathy) as f:
    data = f.read()
jsondata = json.loads(data)

NMphenopacket = Parse(json.dumps(jsondata), Phenopacket())

In [None]:
with open(retinoblastoma) as f:
    data = f.read()
jsondata = json.loads(data)

RBphenopacket = Parse(json.dumps(jsondata), Phenopacket())

In [None]:
NMphenopacket.id

In [None]:
RBphenopacket

In [None]:
class Patient:
    def __init__(self, phenopackJson):
        if not isfile(phenopackJson):
            raise FileNotFoundError("Could not find phenopacket")
            
        with open(phenopackJson) as f:
            data = f.read()
        jsondata = json.loads(data)
        phenopack = Parse(json.dumps(jsondata), Phenopacket())
        
        self._phenopack = phenopack
        self._phenotype = self.__get_hpids()
        self._genotype = []
        if len(phenopack.interpretations) != 0:
            for i in range(len(phenopack.interpretations)):
                print(i)
                self._genotype.append(self.__get_variants(i))
        else:
            print('No interpretations found')
            self._genotype = None
                
        
    
    def __get_hpids(self):
        hp_ids = [x.type for x in self._phenopack.phenotypic_features if not x.excluded]
        return hp_ids
    
    def __get_variants(self, n):
        interp = self._phenopack.interpretations[n] ##Check for more than 1 interpretation
        contig = []
        start = []
        ref = []
        alt = []

        for i in interp.diagnosis.genomic_interpretations:
            try:
                var_des = i.variant_interpretation.variation_descriptor.vcf_record
                contig.append(int(var_des.chrom.split('_')[1].split('.')[0]))
                ref.append(var_des.ref)
                alt.append(var_des.alt)
                start.append(var_des.pos)
            except:
                print("Error with Variant")
                print(i.variant_interpretation.variation_descriptor)
                continue
        
        myVars = []
        for i in range(len(contig)):
            myVar = vc.Variant(str(contig[i]), start[i], ref[i], alt[i], ensembl = pyensembl.ensembl_grch38)
            myVars.append(myVar)
        return myVars
    
    @property
    def get_phenopacket(self):
        return self._phenopack
    
    @property
    def get_phenotypes(self):
        return self._phenotype
    
    @property
    def get_genotypes(self):
        geno = []
        if self._genotype == None:
            return None
        for i in self._genotype:
            for e in i:
                geno.append(e.short_description)
        return geno
    
    @property
    def get_var_effects(self):
        if self._genotype == None:
            return None
        effected = []
        for i in self._genotype:
            for e in i:
                effected.extend(e.effects().effects)
        return effected
    
    def is_missense(self):
        if self._genotype == None:
            return None
        miss = []
        for i in self.get_var_effects:
            if i.short_description.endswith("*") or not i.variant.is_snv:
                miss.append(False)
            else:
                miss.append(True)
        return miss
    
    def is_nonsense(self):
        if self._genotype == None:
            return None
        non = []
        for i in self.get_var_effects:
            if i.short_description.endswith("*") and i.variant.is_snv:
                non.append(True)
            else:
                non.append(False)
        return non
    
    def describe(self):
        stats = {
            "ID": self._phenopack.id,
            "Disease": self._phenopack.diseases,
            "Phenotypic Features": self.get_phenotypes,
            "Variants": self.get_genotypes,
            "Effects of Variants": self.get_var_effects,
            "Number Missense": sum(self.is_missense())
        }
        return stats

In [None]:
newPat2 = Patient('../phenopackets/retinoblastoma.json')

In [None]:
newPat2.get_var_effects

In [None]:
allPatients = defaultdict(Patient)

for file in glob.glob('../phenopackets/*.json'):
    fileName = os.path.basename(file)
    print(fileName)
    current = Patient(file)
    
    if current.get_genotypes is not None and len(current.get_genotypes) != 0:
        allPatients[fileName] = current
    
for p in allPatients: print(allPatients[p].describe())

In [None]:
class Counts:
    def __init__(self, diseaseName):
        self._disease = diseaseName.lower()
        self._totals = pd.DataFrame(index = ["Missense","Nonsense"])
    
    
    def add_patient(self, patient):
        if not isinstance(patient, Patient):
            raise NotAPatientError("ERROR: Input must be of class Patient")
        hasDisease = False
        for d in patient.describe()["Disease"]:
            if d.term.label.lower() == self._disease:
                hasDisease = True
        if not hasDisease:
            raise DoesNotHaveDiseaseError("ERROR: This patient does not have disease - " + self._disease)
        
        for p in patient.get_phenotypes:
            if p.id not in self._totals.columns:
                self._totals[p.id] = 0
            miss = sum(patient.is_missense())
            nons = sum(patient.is_nonsense())
            self._totals.at["Missense", p.id] += miss
            self._totals.at["Nonsense", p.id] += nons
        return self._totals



In [None]:
totalRetinal = Counts('Retinoblastoma')

In [None]:
totalRetinal.add_patient(allPatients['retinoblastoma.json'])