### Query:
What genes encode proteins that physically interact with proteins encoded by the EDS associated genes.

### Input:

Hardcoded tsv file from:

https://raw.githubusercontent.com/NCATS-Tangerine/cq-notebooks/master/FA_gene_sets/FA_1_core_complex.txt

### Goal:
This simple query aims to expand the FA-core gene set based PPI network membership.

### Route:
1. Protein-protein interactions from Monarch Solr index, either direct or inferred through orthology.  Sources: [BioGRID](https://thebiogrid.org/) and [STRING](http://string-db.org/)
2. Gene-ortholog associations from Monarch Solr index.  Sources: [Panther](http://www.pantherdb.org/)


In [18]:
import requests
import pandas as pd
import copy
core_set = 'https://raw.githubusercontent.com/kcortes133/EDS_Rotation/master/genes.txt'

columns = ['gene', 'interactor_id', 'interactor_symbol', 'qualifier', 'inferred_gene']
dataframe = pd.read_csv(core_set, sep="   ", names=['gene', 'symbol']) # sep on 3 spaces
dataframe
solr_url = 'https://solr.monarchinitiative.org/solr/golr/select'
def get_solr_results(solr, params):
    resultCount = params['rows']
    while params['start'] < resultCount:
        solr_request = requests.get(solr, params=params)
        response = solr_request.json()
        resultCount = response['response']['numFound']
        params['start'] += params['rows']
        for doc in response['response']['docs']:
            yield doc

interaction_params = {
    'wt': 'json',
    'rows': 100,
    'start': 0,
    'q': '*:*',
    'fl': 'subject, subject_label, subject_closure, \
           object, object_label, object_taxon',
    'fq': ['relation_closure: "RO:0002434"']
}

# Make new dataframe for results
interact_table = pd.DataFrame(columns=columns)


# Get interactions, both direct and inferred
for index, row in dataframe.iterrows():
    params = copy.deepcopy(interaction_params)
    params['fq'].append('subject_closure: "{0}" \
                        OR subject_ortholog_closure: "{0}"'
                        .format(row['gene']))
    for doc in get_solr_results(solr_url, params):
        result = {}
        result['gene'] = row['symbol']
        result['interactor_id'] = doc['object']
        result['interactor_symbol'] = doc['object_label']
        if row['gene'] in doc['subject_closure']:
            result['qualifier'] = "direct"
        else:
            result['qualifier'] = "homology"    
        interact_table = interact_table.append(result, ignore_index=True)
            
interact_table

  dataframe = pd.read_csv(core_set, sep="   ", names=['gene', 'symbol']) # sep on 3 spaces
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = interact_table.append(result, ignore_index=True)
  interact_table = intera

Unnamed: 0,gene,interactor_id,interactor_symbol,qualifier,inferred_gene
0,COL5A1,HGNC:9255,PPIB,direct,
1,COL5A1,HGNC:9081,PLOD1,direct,
2,COL5A1,HGNC:9083,PLOD3,direct,
3,COL5A1,HGNC:9082,PLOD2,direct,
4,COL5A1,HGNC:16171,CHMP4B,direct,
...,...,...,...,...,...
695,COL1A2,HGNC:6156,ITGB3,direct,
696,COL1A2,HGNC:6153,ITGB1,direct,
697,COL1A2,HGNC:6150,ITGAV,direct,
698,COL1A2,HGNC:6134,ITGA1,direct,


In [19]:
interact_table2 = interact_table[interact_table['gene'].isin(["MYLK", "COL5A1"])]

In [20]:
interactors = interact_table2[['gene', 'interactor_symbol']].drop_duplicates()

In [21]:
all_interactors = interactors.value_counts('interactor_symbol').reset_index()
multi_interactors = all_interactors[all_interactors[0] > 1]

In [22]:
multi_interactors

Unnamed: 0,interactor_symbol,0
0,ITGB1,2
1,VIRMA,2
2,ANLN,2
3,ITGA1,2


In [23]:
include_interactors = multi_interactors["interactor_symbol"].tolist()

In [24]:
interact_table[interact_table["interactor_symbol"] == "LOX"]

Unnamed: 0,gene,interactor_id,interactor_symbol,qualifier,inferred_gene
545,COL1A1,HGNC:6664,LOX,direct,
684,COL1A2,HGNC:6664,LOX,direct,


In [25]:
import graphviz as gv

In [26]:
g = gv.Digraph()
nodes = list(set(interact_table['gene'].tolist())) + include_interactors
for node in nodes:
    g.node(node)
for row in interact_table.itertuples():
    if row.interactor_symbol in include_interactors:
        g.edge(row.gene, row.interactor_symbol, label="interacts_with")


In [27]:
g

In [28]:
dataframe

Unnamed: 0,gene,symbol
0,NCBIGene:1289,COL5A1
1,NCBIGene:1303,COL12A1
2,NCBIGene:7148,TNXB
3,NCBIGene:4638,MYLK
4,NCBIGene:4629,MYH11
5,NCBIGene:1277,COL1A1
6,NCBIGene:1278,COL1A2


In [44]:
# @param geneList: list of genes
# @param interactors: gene interactor table
# @param minInteractions: minimum number of interactions interactor must have
# @displays: graph of EDS genes from list and only shared interactors
def pairwise_graph(geneList, interact_table, minInteractions):
    #return graph of eds genes in list and only interactors that they share
    interactors = {}
    for gene in geneList:
        # get all interactors for gene
        #print(gene)
        geneInteract = list(interact_table[interact_table['gene'].isin([gene])]['interactor_symbol'])
        #print(geneInteract)
        for interactor in geneInteract:
            if interactor in interactors:
                interactors[interactor].append(gene)
            else:
                interactors[interactor] = [gene]

        gI = gv.Digraph()
        nodes = geneList
        finalInteractors = []
        for i in interactors:
            if len(interactors[i]) >= minInteractions:
                # \\TODO feels like this is probably wrong
                # graph isnt showing up
                gI.node(node)
                gI.edge(node, i, label='interacts_with')
    gI
    # final interactors are only shared with all genes

pairwise_graph(['MYLK', 'COL5A1'], interact_table)

VIRMA
ANLN
ITGB1
ITGA1
