In [1]:
import pandas
import reactome2py
from reactome2py import analysis, content, utils
from collections import defaultdict
from itertools import chain
from operator import methodcaller

#### Assessing pathways and drug targets in Cyclic Multiplexed-Immunofluorescence (cmIF) image biomarkers
Resource: https://www.ncbi.nlm.nih.gov/pubmed/31502168

1. After mapping proteins to gene names, we wrangle the matrix into a dictionary where each image section would hold the biomarkers as its values. 

In [2]:
path = "data/marker_matrix_gene.csv"

df = pandas.read_csv(path, index_col=0)
df = df.apply(lambda x : x.str.split(","), axis=0)
row_dict = df.T.to_dict('records')

def imggenes2dict(row_dict):
    dd = defaultdict(list)
    dict_items = map(methodcaller('items'), row_dict)
    
    for k, v in chain.from_iterable(dict_items):
        dd[k].extend(v)
    
    img_dict = dict(dd)
    return(img_dict)

img_dict = imggenes2dict(row_dict)

2. Now we can pass the biomarkers to fetch all possible reactome pathways available for each image segment. 

In [3]:
result = [analysis.identifiers(ids=",".join(d)) for d in img_dict.values()]
tokens = [r['summary']['token'] for r in result]


def get_pathways(t):
    token_result = analysis.token(token=t, species='Homo sapiens', page_size='-1', page='-1', sort_by='ENTITIES_FDR', 
                                  order='ASC', resource='TOTAL', p_value='1', include_disease=True, 
                                  min_entities=None, max_entities=None)
    l = [p['name'] for p in token_result['pathways']]
    l.sort() 
    return(l)

pathways = [get_pathways(t) for t in tokens]

3. Given the biomarkers, we can also fetch all available drug targets from Drug Central.

In [4]:
result_drug_targets = [utils.genelist_drug_target(ids=",".join(d), source="drugcentral") for d in img_dict.values()]