In [1]:
# !jupyter nbextension enable --py --sys-prefix widgetsnbextension
# !jupyter nbextension enable --py --sys-prefix qgrid
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import qgrid
import g2p_client
from IPython.display import IFrame    
import re
from IPython.display import Javascript
from collections import Counter

%env BIOONTOLOGY_API_KEY=fb76114e-2eff-4a05-9a2e-eba5b9fb6f0e

 


env: BIOONTOLOGY_API_KEY=fb76114e-2eff-4a05-9a2e-eba5b9fb6f0e


In [251]:

dataset_features = ['see harvester/query_generator']

dataset_name = 'XXXX'

In [252]:
def to_df(generator, features ):
    
    def to_dict(generator, features):
        for hit in generator:
            drugs = [ec['term'].encode('utf-8') for ec in hit['association']['environmentalContexts']]
            description = hit['association']['description'].encode('utf-8')
            source = hit['source'].encode('utf-8')
            evidence_label = hit['association']['evidence_label']
            id = hit['evidence.id']
            phenotype = hit['association']['phenotype']['type']['term'].encode('utf-8')
            publications = []
            matches = []
            
            
            try:        
                for f in features:
                    for f2 in hit['features']:
                        #print '?', s, f2.get('synonyms', [])
                        for s in f.get('synonyms', []):
                            if s in f2.get('synonyms', []):
                                matches.append(s)
                            for p in f.get('pathways',[]):
                                if p in f2.get('pathways',[]):
                                    matches.append(p)
                        for sp in f.get('swissprots',[]):
                            # print '??', sp, f2.get('swissprots', [])
                            if sp in f2.get('swissprots',[]):
                                matches.append(sp)

                        if f.get('sequence_ontology', {'name':'X'}) == f2.get('sequence_ontology', {'name':None}):
                            matches.append(f['sequence_ontology']['name'])

                        # print '????', f['geneSymbol'], f2.get('geneSymbol', None)
                        if f['geneSymbol'] == f2.get('geneSymbol', None):
                            matches.append(f['geneSymbol'])
                matches = list(set(matches))
                
                for e in hit['association']['evidence']:
                    for p in e['info']['publications']:
                        publications.append(p)


                yield {'id':id,
                       'source': source,
                       'evidence_label': evidence_label,
                       'drugs': drugs,
                       'description': description,
                       'phenotype':phenotype,
                       'publications':publications,
                       'matches': matches
                      }
            except Exception as e:
                print '!!!',e
                
    
    try:        
        return pd.DataFrame.from_records( to_dict(generator, features) , index='id')
    except Exception as e:
        return pd.DataFrame()
    

def allele_identifier(feature_associations):
    is_HG37 = re.compile('NC_.*\.10:g')    
    for fa in feature_associations:
        for f in fa['features']:
            for s in f.get('synonyms', []):
                if is_HG37.match(s):
                    return '{} {}'.format(f['geneSymbol'], s)
            return '{} {} {}'.format(f['geneSymbol'], f['start'], f['sequence_ontology']['name'])

        
def biomarker_type(feature_associations):
    for fa in feature_associations:
        for f in fa['features']:
            return f['biomarker_type']
        

In [253]:

g2p = g2p_client.G2PDatabase('elastic')

queries = []
data_frames = {}

for patient_feature in dataset_features:
    location_query = g2p_client.location_query([patient_feature])
    identifier = allele_identifier(location_query['feature_associations'])
    b_type = biomarker_type(location_query['feature_associations'])
    for name in location_query['queries'].keys():
        qs = location_query['queries'][name]['query']['query_string']['query']
        if identifier not in data_frames:
            data_frames[identifier] = {}
        data_frames[identifier][name] = to_df(g2p.raw_dataframe(query_string=qs, verbose=False),
                                              location_query['feature_associations'][0]['features'] )
        queries.append({'biomarker_type': b_type,
                        'allele': identifier,
                        'name': name,
                        'hits': len(data_frames[identifier][name]),
                        'query_string': qs
                       })





http://reg.genome.network/allele?hgvs=NC_000003.11%3Ag.41266125C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000411226.1:p.Thr34Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000409302.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000412219.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000379486.3:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000401599.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000385604.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000379488.3:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000400508.1:p.Thr34Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000344456.5:p.Thr41Ile?domains



http://reg.genome.network/allele?hgvs=NC_000003.11%3Ag.142188239T%3EC
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000343741.4:p.Ile2164Met?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q13535&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000010.10%3Ag.89685309_89685310insAATCT




http://reg.genome.network/allele?hgvs=NC_000003.11%3Ag.41266125C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000411226.1:p.Thr34Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000409302.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000412219.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000379486.3:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000401599.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000385604.1:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000379488.3:p.Thr41Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000400508.1:p.Thr34Ile?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000344456.5:p.Thr41Ile?domains



http://reg.genome.network/allele?hgvs=NC_000023.10%3Ag.1584977C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000370697.4:p.Ala159Thr?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q86VZ1&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000003.11%3Ag.170802031T%3EC
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000284483.8:p.Thr1020Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000349880.5:p.Thr999Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000345352.6:p.Thr944Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000419990.1:p.Thr991Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000418378.1:p.Thr973Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000418916.1:p.Thr965Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000418156.1:p.Thr936Ala?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000399511.2:p.Thr1028Ala?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9UKE5&organism=homo%20s



http://reg.genome.network/allele?hgvs=NC_000011.9%3Ag.118764345C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000292174.4:p.Ser31Phe?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q86UU0&organism=homo%20sapiens
http://www.pathwaycommons.org/pc2/search.json?q=P32302&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000002.11%3Ag.136071095C%3EG
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000264159.6:p.Leu310Phe?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000264159.6:p.Leu310Phe':  : Could not uniquely determine nucleotide change from ENSP00000264159.6:p.Leu310Phe: Could not get a Transcript object for 'ENSP00000264159'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000264159.6:p.Leu310Phe?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000384245.2:p.Leu310Phe?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000384245.2:p.Leu310Phe':  : Could not uniquely determine nucleotide change from ENSP00000384245.2:p.Leu310Phe: Could not get a Transcript object for 'ENSP00000384245'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000384245.2:p.Leu310Phe?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000383979



http://reg.genome.network/allele?hgvs=NC_000020.10%3Ag.2097401C%3EG
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000370891.3:p.Leu328Val?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000426612.1:p.Leu189Val?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q8TDR2&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000012.11%3Ag.58130871C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000449241.1:p.Ala387Thr?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q99490&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000006.11%3Ag.33690771C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000398861.1:p.Arg320His?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000293756.4:p.Arg320His?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q96PC2&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000013.10%3Ag.101910837C%3EG
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000251127.6:p.Gly408Ala?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q8IZF0&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000012.11%3Ag.57424051C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000300119.3:p.Arg845Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000393392.2:p.Arg845Lys?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9UHF0&organism=homo%20sapiens
http://www.pathwaycommons.org/pc2/search.json?q=Q9UBC5&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000013.10%3Ag.38211533C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000369027.3:p.Arg814Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000369001.1:p.Arg641Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000342580.5:p.Arg641Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000486109.1:p.Arg819Lys?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000486109.1:p.Arg819Lys': Could not get a Transcript object for 'ENSP00000486109'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000486109.1:p.Arg819Lys?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9UBN4&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000001.10%3Ag.32257834G%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000399778.2:p.Arg418Cys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000353670.2:p.Arg982Cys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000257100.3:p.Arg475Cys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000435851.1:p.Arg982Cys?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q6ZMY3&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000006.11%3Ag.28097282G%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000366527.3:p.Glu201Lys?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9H4T2&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000007.13%3Ag.84628811C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000284136.6:p.Arg760Gln?domains=1&protein=1&uniprot=1




http://www.pathwaycommons.org/pc2/search.json?q=O95025&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000001.10%3Ag.72400932G%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000350364.4:p.Ala80Val?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000413294.2:p.Ala25Val?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000413294.2:p.Ala25Val':  : Reference allele extracted from ENSP00000413294:72748098-72748098 (T) does not match reference allele given by HGVS notation ENSP00000413294.2:p.Ala25Val (C): Could not get a Transcript object for 'ENSP00000413294'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000413294.2:p.Ala25Val?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q7Z3B1&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000003.11%3Ag.35778772C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000412326.1:p.Pro487Leu?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000405276.1:p.Pro467Leu?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000187397.4:p.Pro521Leu?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9UBL0&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000001.10%3Ag.21016738G%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000247986.2:p.Arg442Ser?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000364184.1:p.Arg342Ser?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000383311.3:p.Arg442Ser?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9P2E2&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000003.11%3Ag.51929267C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000307958.5:p.Arg86Gln?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q8N6M8&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000008.10%3Ag.95188799C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000401468.2:p.Glu132Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000428189.1:p.Glu132Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000392811.2:p.Glu132Lys?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000027335.3:p.Glu132Lys?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q12864&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000011.9%3Ag.96117712A%3EG
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000278520.5:p.Leu67Pro?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000442723.1:p.Leu67Pro?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000397156.2:p.Leu67Pro?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q8N4S0&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000011.9%3Ag.105967614C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000278618.4:p.Arg304Ter?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9NRN7&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000021.8%3Ag.31538698C%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000286808.3:p.Ala80Ser?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=P56750&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000001.10%3Ag.38483424G%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000362105.4:p.Met70Ile?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000362105.4:p.Met70Ile':  : Could not uniquely determine nucleotide change from ENSP00000362105.4:p.Met70Ile: Could not get a Transcript object for 'ENSP00000362105'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000362105.4:p.Met70Ile?domains=1&protein=1&uniprot=1
http://reg.genome.network/allele?hgvs=NC_000004.11%3Ag.155526050T%3EG
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000384552.1:p.Gln441Pro?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000384101.1:p.Gln441Pro?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000384860.3:p.Gln433Pro?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000336829.3:p.Gln433Pro?domains=1&protein=1&uniprot=1
htt



http://reg.genome.network/allele?hgvs=NC_000005.9%3Ag.148617051C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000420855.1:p.Ala310Val?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000310309.7:p.Ala310Val?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000425394.1:p.Ala310Val?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=O94929&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000006.11%3Ag.31895941C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000410815.1:p.Pro86Ser?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000410815.1:p.Pro86Ser':  : Unable to map the peptide coordinate 86 to genomic coordinates for protein ENSP00000410815: Could not get a Transcript object for 'ENSP00000410815'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000410815.1:p.Pro86Ser?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000403325.1:p.Pro86Ser?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000403325.1:p.Pro86Ser':  : Unable to map the peptide coordinate 86 to genomic coordinates for protein ENSP00000403325: Could not get a Transcript object for 'ENSP00000403325'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000403325.1:p.Pro86Ser?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP



http://reg.genome.network/allele?hgvs=NC_000017.10%3Ag.18051501C%3ET
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000481642.1:p.Ala2223Val?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000481642.1:p.Ala2223Val': Could not get a Transcript object for 'ENSP00000481642'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000481642.1:p.Ala2223Val?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000205890.5:p.Ala2223Val?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9UKN7&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000020.10%3Ag.60969271G%3EC
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000279101.5:p.Ser219Cys?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9BTV7&organism=homo%20sapiens




http://reg.genome.network/allele?hgvs=NC_000004.11%3Ag.186096984G%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000423312.1:p.Leu426Phe?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000409964.2:p.Leu426Phe?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9P2B7&organism=homo%20sapiens
http://reg.genome.network/allele?hgvs=NC_000006.11%3Ag.110056501G%3EA
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000230124.3:p.Gly216Arg?domains=1&protein=1&uniprot=1




{u'error': u"Unable to parse HGVS notation 'ENSP00000230124.3:p.Gly216Arg':  : Unable to map the peptide coordinate 216 to genomic coordinates for protein ENSP00000230124: Could not get a Transcript object for 'ENSP00000230124'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000230124.3:p.Gly216Arg?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000412156.1:p.Gly195Arg?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000412156.1:p.Gly195Arg':  : Unable to map the peptide coordinate 195 to genomic coordinates for protein ENSP00000412156: Could not get a Transcript object for 'ENSP00000412156'"} https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000412156.1:p.Gly195Arg?domains=1&protein=1&uniprot=1
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000357937.1:p.Gly139Arg?domains=1&protein=1&uniprot=1
{u'error': u"Unable to parse HGVS notation 'ENSP00000357937.1:p.Gly139Arg':  : Unable to map the peptide coordin



http://reg.genome.network/allele?hgvs=NC_000016.9%3Ag.67702418A%3EG
https://grch37.rest.ensembl.org/vep/human/hgvs/ENSP00000384117.3:p.Asp290Gly?domains=1&protein=1&uniprot=1
http://www.pathwaycommons.org/pc2/search.json?q=Q9H0I2&organism=homo%20sapiens
http://www.pathwaycommons.org/pc2/search.json?q=Q6ZW13&organism=homo%20sapiens


In [254]:
from IPython.display import clear_output
from IPython.display import IFrame  
from IPython.core.display import HTML

queries_df = pd.DataFrame.from_records(queries)
queries_df_widget = qgrid.show_grid(queries_df, grid_options={'editable':False})
qgrid_widget = None
top3_widget = None
allele = None 
name = None

def top(df, fields = ['publications', 'evidence_label', 'phenotype', 'drugs' ], size=3):
    counters = []
    for f in fields:
        counter = Counter()
        for value in df[f]:
            if isinstance(value, (list,tuple,)):
                for v in value:
                    counter[v] += 1
            else:
                counter[value] += 1
        for c in counter.most_common(size):
            counters.append({'field': f, 'value': c[0], 'count': c[1]})
    return counters    
    

def on_queries_df_row_selected(change):
    global allele
    global name
    global qgrid_widget
    global top3_widget    
    selected_row = queries_df_widget.get_selected_df().to_dict('records')
    allele = selected_row[0]['allele']
    name = selected_row[0]['name']
    query_string = selected_row[0]['query_string']
    top3_widget = None
    
    try:    
        df = data_frames[allele][name]
        qgrid_widget = qgrid.show_grid(df)
        top3_widget = qgrid.show_grid(pd.DataFrame(top(df)))
        
    except Exception as e:
        # df could be empty
        pass  
    
    # show publication
    def on_row_selected(change):
        global qgrid_widget
        try:
            selected_row = qgrid_widget.get_selected_df().to_dict('records')
            publications = selected_row[0]['publications']
            matches = selected_row[0]['matches']            
            clear_output(wait=True)
            display(queries_df_widget,
                    HTML('<p>{} {}</p><p>{}</p>'.format(allele, name, query_string)),
                    qgrid_widget,
                    HTML('<p>matches:{}</p><p>{}</p>'.format(matches, publications)),
                    IFrame(publications[0], width=900, height=900))    
        except Exception as e:          
            print e

    qgrid_widget.observe(on_row_selected, names=['_selected_rows'])        
    clear_output(wait=True)
    try:
        if top3_widget:
            display(queries_df_widget,
                    HTML('<p>top3:</p>'),
                    top3_widget,
                    HTML('<p>{} {}</p><p>{}</p>'.format(allele, name, query_string)),
                    qgrid_widget)    
        else:
            display(queries_df_widget, HTML('<p>No results</p>'))
    except Exception as e:          
        print e        
    
queries_df_widget.observe(on_queries_df_row_selected, names=['_selected_rows'])   # ,  

display(queries_df_widget)

In [255]:
query_names = ['alleles', '~location', '~range', 'protein_effects', 'protein_domain', '~biomarker_type',  'genes' , 'pathways']
evidence_levels = ['A', 'B', 'C', 'D']
limit = 3
for name in query_names:
    df = pd.DataFrame()
    for allele in data_frames.keys():
        if name in data_frames[allele] and len(data_frames[allele][name]):
            df = df.append(data_frames[allele][name])
    if len(df) == 0:
        continue
    for evidence_level in evidence_levels:
        evidence_level_df = df[df.evidence_label == evidence_level]
        if len(evidence_level_df):
            publication = top(evidence_level_df, fields=['publications'], size=1)[0]['value']
            print name, evidence_level, publication
            c = 0
            for evidence in evidence_level_df.to_records():
                if publication in evidence.publications:
                    print '    ', evidence.source, evidence.phenotype, evidence.drugs
                    print '    ', evidence.description 
                    c += 1
                    if c == limit:
                        break
            break


~location B http://www.ncbi.nlm.nih.gov/pubmed/27091708
     cgi breast adenocarcinoma ['EVEROLIMUS', 'TRASTUZUMAB', 'mAb']
     PTEN ERBB2 Everolimus + Trastuzumab + Chemotherapy (MTOR inhibitor + ERBB2 mAb inhibitor + Chemotherapy) Responsive
~range B http://www.ncbi.nlm.nih.gov/pubmed/27091708
     cgi breast adenocarcinoma ['EVEROLIMUS', 'TRASTUZUMAB', 'mAb']
     PTEN ERBB2 Everolimus + Trastuzumab + Chemotherapy (MTOR inhibitor + ERBB2 mAb inhibitor + Chemotherapy) Responsive
protein_effects B http://www.ncbi.nlm.nih.gov/pubmed/27091708
     cgi breast adenocarcinoma ['EVEROLIMUS', 'TRASTUZUMAB', 'mAb']
     PTEN ERBB2 Everolimus + Trastuzumab + Chemotherapy (MTOR inhibitor + ERBB2 mAb inhibitor + Chemotherapy) Responsive
     cgi breast adenocarcinoma ['EVEROLIMUS', 'TRASTUZUMAB', 'mAb']
     PTEN ERBB2 Everolimus + Trastuzumab + Chemotherapy (MTOR inhibitor + ERBB2 mAb inhibitor + Chemotherapy) Responsive
~biomarker_type B http://www.ncbi.nlm.nih.gov/pubmed/16282176
     civic 

In [256]:
query_names = ['alleles', '~location', '~range', 'protein_effects', 'protein_domain', '~biomarker_type',  'genes' , 'pathways']
evidence_levels = ['A', 'B', 'C', 'D']
limit = 2
print '# {}\n'.format(dataset_name)
for name in query_names:
    df = pd.DataFrame()
    for allele in data_frames.keys():
        if name in data_frames[allele] and len(data_frames[allele][name]):
            df = df.append(data_frames[allele][name])
    if len(df) == 0:
        continue
    print '\n## {}\n'.format(name)
    for evidence_level in evidence_levels:
        evidence_level_df = df[df.evidence_label == evidence_level]
        if len(evidence_level_df):
            publication = top(evidence_level_df, fields=['publications'], size=1)[0]['value']
            print '   *',evidence_level, publication
            c = 0
            for evidence in evidence_level_df.to_records():
                if publication in evidence.publications:
                    print '      *', evidence.source, evidence.phenotype, evidence.drugs
                    print '       ', evidence.description 
                    c += 1
                    if c == limit:
                        break


# p204


## ~location

   * B http://www.ncbi.nlm.nih.gov/pubmed/27091708
      * cgi breast adenocarcinoma ['EVEROLIMUS', 'TRASTUZUMAB', 'mAb']
        PTEN ERBB2 Everolimus + Trastuzumab + Chemotherapy (MTOR inhibitor + ERBB2 mAb inhibitor + Chemotherapy) Responsive
   * C http://www.ncbi.nlm.nih.gov/pubmed/26861905
      * jax sarcoma ['Imatinib']
        In a retrospective analysis, patients with desmoid fibromatosis harboring CTNNB1 T41A demonstrated a greater progression arrest rate at 6 months (70%) compared to patients with CTNNB1 wild-type (45%) when treated with Gleevec (imatinib) (PMID: 26861905). 

## ~range

   * B http://www.ncbi.nlm.nih.gov/pubmed/27091708
      * cgi breast adenocarcinoma ['EVEROLIMUS', 'TRASTUZUMAB', 'mAb']
        PTEN ERBB2 Everolimus + Trastuzumab + Chemotherapy (MTOR inhibitor + ERBB2 mAb inhibitor + Chemotherapy) Responsive
   * C http://www.ncbi.nlm.nih.gov/pubmed/28514312
      * jax colon cancer ['VEMURAFENIB', 'PANITUMUMAB']
        In a clini

      * jax melanoma ['TRAMETINIB']
        In a preclinical study, Mekinist (trametinib) reduced ERK signaling and inhibited proliferation of a melanoma cell line harboring BRAF D594G in culture (PMID: 28783719).
      * jax collecting duct carcinoma ['TRAMETINIB']
        In a preclinical study, the combination of Mekinist (trametinib) and Capmatinib (INC280) induced tumor regression in a patient-derived xenograft (PDX) model derived from the ovarian metastasis of a patient with collecting duct carcinoma, which harbored BRAF D594N and BRAF G466A and had high levels of MET and EGFR expression (PMID: 28783719).
