In [1]:
import json
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import pandas as pd
original_file = '../../data/original.json'
modified_file = '../../data/modified.json'

with open(original_file, 'r') as o:
  original = json.load(o)

with open(modified_file, 'r') as m:
  modified = json.load(m)

In [2]:
ori_df = pd.DataFrame(original)
mod_df = pd.DataFrame(modified)

In [3]:
mod_df.head()

Unnamed: 0,geneSymbol,geneIdentifier,combinedScore,priorityScore,variantScore,pValue,priorityResults,compatibleInheritanceModes,geneScores,variantEvaluations,compatibleGeneScores,associatedDiseases
0,DDX56,"{'geneId': 'ENSG00000136271', 'geneSymbol': 'D...",0.763548,0.508362,0.99917,0.00071,{'HIPHIVE_PRIORITY': {'priorityType': 'HIPHIVE...,[AUTOSOMAL_DOMINANT],[{'geneIdentifier': {'geneId': 'ENSG0000013627...,"[{'genomeAssembly': 'HG19', 'contigName': '7',...",[{'geneIdentifier': {'geneId': 'ENSG0000013627...,
1,BDKRB1,"{'geneId': 'ENSG00000100739', 'geneSymbol': 'B...",0.751163,0.501144,1.0,0.000732,{'HIPHIVE_PRIORITY': {'priorityType': 'HIPHIVE...,[AUTOSOMAL_DOMINANT],[{'geneIdentifier': {'geneId': 'ENSG0000010073...,"[{'genomeAssembly': 'HG19', 'contigName': '14'...",[{'geneIdentifier': {'geneId': 'ENSG0000010073...,
2,KRT4,"{'geneId': 'ENSG00000170477', 'geneSymbol': 'K...",0.748943,0.500005,1.0,0.000931,{'HIPHIVE_PRIORITY': {'priorityType': 'HIPHIVE...,[AUTOSOMAL_DOMINANT],[{'geneIdentifier': {'geneId': 'ENSG0000017047...,"[{'genomeAssembly': 'HG19', 'contigName': '12'...",[{'geneIdentifier': {'geneId': 'ENSG0000017047...,"[{'diseaseId': 'OMIM:193900', 'diseaseName': '..."
3,ENPP1,"{'geneId': 'ENSG00000197594', 'geneSymbol': 'E...",0.748863,0.500353,0.99956,0.000954,{'HIPHIVE_PRIORITY': {'priorityType': 'HIPHIVE...,[AUTOSOMAL_DOMINANT],[{'geneIdentifier': {'geneId': 'ENSG0000019759...,"[{'genomeAssembly': 'HG19', 'contigName': '6',...",[{'geneIdentifier': {'geneId': 'ENSG0000019759...,"[{'diseaseId': 'OMIM:125853', 'diseaseName': '..."
4,MUC6,"{'geneId': 'ENSG00000184956', 'geneSymbol': 'M...",0.74771,0.500219,0.999044,0.001142,{'HIPHIVE_PRIORITY': {'priorityType': 'HIPHIVE...,"[AUTOSOMAL_DOMINANT, AUTOSOMAL_RECESSIVE]",[{'geneIdentifier': {'geneId': 'ENSG0000018495...,"[{'genomeAssembly': 'HG19', 'contigName': '11'...",[{'geneIdentifier': {'geneId': 'ENSG0000018495...,


In [13]:
def exomiser_results_comparator(df1: pd.DataFrame, df2: pd.DataFrame, column='combinedScore') -> dict:
  valid_columns=['combinedScore','priorityScore','variantScore','pValue']
  if column not in valid_columns:
      raise ValueError("Wrong status")
  dic = {}
  symbols = df1['geneSymbol'].unique()
  for s in symbols:
    sub_mod = df2.query(f"geneSymbol == '{s}'")
    if sub_mod is not None:
      sub_ori = df1.query(f"geneSymbol == '{s}'")
      orig_value = sub_ori[column].values[0]
      mod_value = sub_mod[column].values[0]
      pct = (orig_value / mod_value) * 100
      dic[s] = {'symbol':sub_ori['geneSymbol'].values[0],
      'orig':orig_value,
      'mod': mod_value,
      'pct_change': round((mod_value - orig_value) / orig_value * 100.0, 2) if orig_value != 0 else float("inf") * abs(mod_value) / mod_value if mod_value != 0 else 0.0}
  return dic

def predicted_genes(df1: pd.DataFrame, df2: pd.DataFrame) -> dict:
  symbols_original = df1['geneSymbol'].unique()[0:4]
  symbols_modified = df2['geneSymbol'].unique()[0:4]
  return {'original': symbols_original, 'modified': symbols_modified}
  
  
  

In [8]:
pd.DataFrame(exomiser_results_comparator(ori_df,mod_df, column='combinedScore')).T

Unnamed: 0,symbol,orig,mod,pct_change
FGFR2,FGFR2,0.997449,0.684206,-31.4
ENPP1,ENPP1,0.86904,0.748863,-13.83
ZNF366,ZNF366,0.824789,0.304413,-63.09
PKD1,PKD1,0.79177,0.476847,-39.77
MYH3,MYH3,0.76918,0.237926,-69.07
...,...,...,...,...
ITPRID2,ITPRID2,0.000004,0.000023,469.65
TNXB,TNXB,0.000004,0.000034,843.93
LILRA2,LILRA2,0.000003,0.000026,678.74
GREB1,GREB1,0.000003,0.000003,0.0


What are the predicted genes

In [14]:
predicted_genes(ori_df, mod_df)

array(['FGFR2', 'ENPP1', 'ZNF366', 'PKD1'], dtype=object)