Transform into a tutorial on how to process a resource /w OmniPath and refer to it

In [None]:
import omnipath
from omnipath.interactions import import_intercell_network
import numpy as np
import pandas as pd

In [None]:
from liana.resource import explode_complexes

In [None]:
def anti_join(left, right, on):
    # Anti join
    outer_join = left.merge(right, how = 'outer', indicator = True, on=on)
    anti = outer_join[~(outer_join._merge == 'both')].drop('_merge', axis = 1)
    return anti

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
op_params = omnipath.interactions.AllInteractions.params()

In [None]:
protein_types = {'adhesion', 'cell_adhesion', 'cell_surface_ligand',
                 'desmosome', 'ecm', # 'ecm_regulator',
                 'gap_junction', # 'inhibitory_cofactor',
                 # 'ligand',
                 # 'ligand_antagonist', 'ligand_regulator',
                 # 'matrix_adhesion_regulator', 'receptor_regulator',
                 'secreted_enzyme', 
                 # 'secreted_receptor', 
                 'tight_junction'
                }
databases = {'CellPhoneDB','CellChatDB', 'ICELLNET', 'connectomeDB2020', 'CellTalkDB'# , 'Cellinker'
            }

In [None]:
transmitter_params = {"categories": protein_types.union({'ligand'}),
                      "databases":databases}

In [None]:
receiver_params = {"categories": protein_types.union({'receptor'}),
                      "databases": databases
                  }

In [None]:
interactions = import_intercell_network(
    interactions_params={"databases": databases},
    transmitter_params=transmitter_params,
    receiver_params=receiver_params,
)

In [None]:
interactions

In [None]:
resource = interactions[['source', 'target', 'genesymbol_intercell_source', 'genesymbol_intercell_target', 'sources', 'references', 'category_intercell_source', 'category_intercell_target']].copy()
resource = resource.rename(columns={'genesymbol_intercell_source':'source_genesymbol',
                                    'genesymbol_intercell_target': 'target_genesymbol'
                                   })
resource

From post

In [None]:
translational = omnipath.interactions.PostTranslational.get(databases=databases, genesymbols=True, entity_types=['protein', 'complex']) # .difference({'CellChatDB'}.union({'SignaLink3'})

In [None]:
msk =  np.isin(translational.source, resource.source) & np.isin(translational.target, resource.target)

In [None]:
translational[msk]

In [None]:
trans_unique = anti_join(translational, resource, on=['source_genesymbol', 'target_genesymbol', 'source', 'target'])

In [None]:
trans_unique

In [None]:
resource = resource.merge(trans_unique, how='outer', on=['source_genesymbol', 'target_genesymbol', 'source', 'target'])

In [None]:
resource

In [None]:
resource = resource[~resource[['source', 'target']].duplicated()]

In [None]:
resource

Additional Resources - Filtered

In [None]:
from liana.resource.utils import obtain_extra_resource

In [None]:
cellinker = obtain_extra_resource(databases = {"Cellinker"},
                                  blocklist = {"SIGNOR_mechanism":{"phosphorylation","dephosphorylation"}},
                                  allowlist = {'Cellinker_type':{'Cytokine-cytokine receptor interaction',
                                                                 'Secreted protein to receptor interaction'}}
                                 )

In [None]:
cellinker

In [None]:
resource = resource.merge(cellinker, how='outer', on=['source', 'target', 'source_genesymbol', 'target_genesymbol'])

In [None]:
resource = resource[['source', 'target', 'source_genesymbol', 'target_genesymbol']]

Process liana-py resource

In [None]:
exploded_resource = explode_complexes(resource[['source_genesymbol', 'target_genesymbol']], SOURCE='source_genesymbol', TARGET='target_genesymbol')

In [None]:
exploded_resource['number'] = 1

In [None]:
exploded_resource

In [None]:
SOURCE = 'source_genesymbol'
TARGET = 'target_genesymbol'
exploded_resource[SOURCE] = exploded_resource[SOURCE].str.replace("^COMPLEX:", "", regex=True)
exploded_resource[TARGET] = exploded_resource[TARGET].str.replace("^COMPLEX:", "", regex=True)

In [None]:
exploded_resource = exploded_resource.assign(
    counter = 
    exploded_resource
    .groupby(['source_genesymbol', 'target_genesymbol'])
    .number
    .transform('sum')
)

In [None]:
exploded_resource[exploded_resource.target_genesymbol_complex=="ACVR1B_ACVR2A"]

In [None]:
exploded_resource = exploded_resource[(exploded_resource.counter > 1) &
                                      ~(exploded_resource['source_genesymbol_complex'].str.contains('_') | exploded_resource['target_genesymbol_complex'].str.contains('_'))]

In [None]:
exploded_resource[exploded_resource.target_genesymbol_complex=="ACVR1B_ACVR2A"]

In [None]:
exploded_resource = exploded_resource[['source_genesymbol_complex', 'target_genesymbol_complex']]

In [None]:
exploded_resource=exploded_resource.rename(columns={"source_genesymbol_complex":"source_genesymbol",
                                                    "target_genesymbol_complex":"target_genesymbol"})

In [None]:
exploded_resource

In [None]:
resource = anti_join(resource, exploded_resource, on=['source_genesymbol', 'target_genesymbol'])

In [None]:
resource

Exploded by Entity

In [None]:
exploded_resource = explode_complexes(resource[['source_genesymbol', 'target_genesymbol']], SOURCE='source_genesymbol', TARGET='target_genesymbol')

In [None]:
SOURCE = 'source_genesymbol'
TARGET = 'target_genesymbol'
exploded_resource[SOURCE] = exploded_resource[SOURCE].str.replace("^COMPLEX:", "", regex=True)
exploded_resource[TARGET] = exploded_resource[TARGET].str.replace("^COMPLEX:", "", regex=True)

In [None]:
exploded_resource = exploded_resource.drop('interaction', axis=1)

In [None]:
def check_if_exploded(exploded_resource, check_entity, anchor_entity):
    check_complex = f'{check_entity}_complex'
    anchor_complex = f'{anchor_entity}_complex'
    
    
    exploded_resource = exploded_resource.drop([anchor_entity], axis=1)
    exploded_resource['number'] = 1
    exploded_resource['counter'] = (exploded_resource
                                    .drop_duplicates()
                                    .groupby([check_entity, anchor_complex])
                                    .number.transform('sum')
                                   )
    msk = (exploded_resource.counter > 1) & (~exploded_resource[check_complex].str.contains('_'))
    exploded_resource = exploded_resource[msk]
    exploded_resource = exploded_resource.drop(columns=check_entity, axis=1)
    
    exploded_resource = exploded_resource.rename(columns={anchor_complex:anchor_entity,
                                                          check_complex:check_entity})
    
    return exploded_resource

In [None]:
remove_sources = check_if_exploded(exploded_resource.copy(), check_entity='source_genesymbol', anchor_entity='target_genesymbol')
remove_sources

In [None]:
remove_targets = check_if_exploded(exploded_resource.copy(), check_entity='target_genesymbol', anchor_entity='source_genesymbol')

In [None]:
remove_targets

In [None]:
resource

In [None]:
resource = anti_join(resource, remove_sources, on=['source_genesymbol', 'target_genesymbol'])

In [None]:
resource

In [None]:
resource = anti_join(resource, remove_targets, on=['source_genesymbol', 'target_genesymbol'])

In [None]:
resource

In [None]:
resource[SOURCE] = resource[SOURCE].str.replace("^COMPLEX:", "", regex=True)
resource[TARGET] = resource[TARGET].str.replace("^COMPLEX:", "", regex=True)

In [None]:
BLOCK_TRANSMITTERS = ["ADGRE5", "CD160"
                      "CD226", "EGFR",
                      "TNFRSF18", "CTLA4",
                      "KLRB1", "KLRF1", "KLRF2",
                      "PTPRC", "PVR", "SIGLEC1",
                      "SIGLEC9", "TNFRSF14",
                      "ITGAD_ITGB2",
                      "ITGA4_ITGB1", "ITGA9_ITGB|1",
                      "ITGA4_ITGB7",
                      "TYK2", "SYK",
                      "MT-RNR2",
                      "IL13_IL13RA1_IL4R",
                      "IL22_IL22RA1",
                      "IL18BP"]

In [None]:
BLOCK_RECEIVERS = ["IFNG_IFNGR1", # include a ligand in the complex
                   "CNTN2_CNTNAP2",
                   "IL2_IL2RA_IL2RB_IL2RG",
                   "IL15_IL15RA_IL2RB_IL2RG",
                   "IL6_IL6R_IL6ST",
                   "IL1B_IL1R1_IL1RAP",
                   "IL1B_IL1R2_IL1RAP",
                   "IFNA2_IFNAR1_IFNAR2",
                   "ACVR1C_ACVR2B_CFC1",
                   "CSF2_CSF2RA_CSF2RB",
                   "GP1BA_GP1BB_GP5_GP9",
                  ""]

In [None]:
# Block wrong
msk = (np.isin(resource[SOURCE], BLOCK_TRANSMITTERS)) | (np.isin(resource[TARGET], BLOCK_RECEIVERS))

In [None]:
resource = resource[~msk]

In [None]:
resource

In [None]:
# resource = resource[resource.target_genesymbol.isin(resource.source_genesymbol)]

In [None]:
# resource = resource[resource.source_genesymbol.isin(resource.target_genesymbol)]

In [None]:
sum(resource.target_genesymbol.isin(resource.source_genesymbol))

In [None]:
?

In [None]:
# resource[~resource.source_genesymbol.isin(resource.target_genesymbol)]

In [None]:
resource.to_csv("/home/dbdimitrov/Downloads/resource.csv", index=False)

In [None]:
from liana.resource import select_resource

In [None]:
consensus = select_resource('consensus')

In [None]:
signalink = obtain_extra_resource(databases = {"Cellinker"},
                                  blocklist = {"SIGNOR_mechanism":{"phosphorylation","dephosphorylation"}},
                                  allowlist = {'Cellinker_type':{'Cytokine-cytokine receptor interaction',
                                                                 'Secreted protein to receptor interaction'}}
                                 )

In [None]:
signalink

In [None]:
add = omnipath.interactions.PostTranslational.get(databases={"SignaLink3"}, 
                                                  genesymbols=True, 
                                                  entity_types=['protein', 'complex'],
                                                  fields={"extra_attrs"})

In [None]:
add = add[~add[['source', 'target']].duplicated()]

In [None]:
add

In [None]:
omnipath.interactions.PostTranslational.params()

In [None]:
add

In [None]:
add.extra_attrs.values[4]

In [None]:
blocklist = {"SIGNOR_mechanism":{"phosphorylation","dephosphorylation"}}
allowlist = {'Cellinker_type':{'Cytokine-cytokine receptor interaction',
                               'Secreted protein to receptor interaction'}}

In [None]:
block_keys = blocklist.keys()
allow_keys = allowlist.keys()

In [None]:
# union of relevant checks
union_keys = block_keys ^ allow_keys

In [None]:
explode_attrs = add['extra_attrs'].apply(_json_intersect_serialize, union_keys=union_keys)

In [None]:
add = pd.concat([add, explode_attrs], axis = 1).drop('extra_attrs', axis=1)

In [None]:
(set(['phosphorylation', 'Phosphorylation']))

In [None]:
add.head(10)

In [None]:
# Convert blocklist to mask
for k in block_keys:
    add[k] = [any([block  in att for block in blocklist[k]]) if type(att) is not float else True for att in add[k]]
    add = add[~add[k]]

In [None]:
# Convert allowlist to mask
for k in allow_keys:
    add[k] = [any([allow  in att for allow in allowlist[k]]) if type(att) is not float else True for att in add[k]]
    add = add[add[k]]

In [None]:
add

In [None]:
[any([block  in att for block in blocklist[k]]) if type(att) is not float else True for att in add[k]]

In [None]:
# len([block in att if type(att) is not float else False for att in add[k] for block in blocklist[k]])

In [None]:
for att in add[k]:
    if type(att) is float:
        True
    any([block in att for block in blocklist[k]])

In [None]:
blocklist[k]

In [None]:
add

In [None]:
# Filter any value from blocklist that is true
add[~add[block_keys].any(axis=1)]

In [None]:
add['SIGNOR_mechanism'].isin(blocklist['SIGNOR_mechanism'])

In [None]:
add['SIGNOR_mechanism']

In [None]:
add

In [None]:
add['extra_attrs'].apply(loads).apply(pd.Series)

In [None]:
add['extra_attrs']

In [None]:
def vlues_tolist_drop(df):
    return df.join(pd.DataFrame(df['extra_attrs'].values.tolist())).drop('extra_attrs', axis=1)

In [None]:
vlues_tolist_drop(add)

In [None]:
add['attribute_check'] = [_check_resource_attributes(att, allowlist, blocklist) for att in add.extra_attrs]

In [None]:
extra_attrs = []

In [None]:
def _check_resource_attributes(att, allowlist, blocklist, keep_empty=False):
    att = loads(att)
    att_keys = set(att.keys())
    
    block_keys = set(blocklist.keys())
    to_block = att_keys.intersection(block_keys)
    
    # Discard any interactions without attributes
    if len(att)==0:
        return keep_empty
        
    # If any interescts with blocklist -> False
    if to_block:
        for key in to_block:
            if type(att[key]) is bool:
                if att[key] is blocklist[key]:
                    return False
            elif set([att[key]]).intersection(blocklist[key]):
                print(set([att[key]]))
                return False
    
    allow_keys = set(allowlist.keys())
    to_allow = att_keys.intersection(allow_keys)
    
    # If any interescts with allowlist -> True
    if to_allow:
        for key in to_allow:
            if type(att[key]) is bool:
                if att[key] is allowlist[key]:
                    return True
            if set([att[key]]).intersection(allowlist[key]):
                return True
    
    # If neither then again False
    return False

In [None]:
att = add.extra_attrs.values[5]
att

In [None]:
_check_resource_attributes(att, allowlist, blocklist)

In [None]:
att = loads(add.extra_attrs.values[1])
att

In [None]:
to_block = set(att.keys()).intersection(set(blocklist.keys()))
to_allow = set(att.keys()).intersection(set(allowlist.keys()))

In [None]:
to_block

In [None]:
att.keys()

In [None]:
to_block

In [None]:
to_check = np.intersect1d(list(att.keys()), list(blacklist.keys()))

In [None]:
att

In [None]:
for key in to_check:
    if set(att[key]).intersection(blacklist[key]):
        False
    

In [None]:
all([False for key in to_check if set(att[key]).intersection(blacklist[key])])

In [None]:
to_check

In [None]:
loads(add['extra_attrs'].values[3])

In [None]:
loads(add['extra_attrs'].values[3])

In [None]:
loads(add['extra_attrs'].values[1]) is False

In [None]:
add[add['type']]

In [None]:
pl = loads(translational["extra_attrs"][3])

In [None]:
omnipath.interactions.PostTranslational.params()

In [None]:
add = import_intercell_network(
    interactions_params={"databases": {"Cellinker"}},
    transmitter_params=transmitter_params,
    receiver_params=receiver_params,
)

In [None]:
add