In [1]:
import os
import pickle
%matplotlib inline
import pandas as pd
import seaborn as sns
from tqdm import tqdm

In [2]:
nodes = pd.read_csv('../data/nodes.csv')
edges = pd.read_csv('../data/edges.csv')

# remove : character from column names to make them queryabke
nodes.columns = ['ID', 'name', 'LABEL']
edges.columns = ['START_ID', 'END_ID', 'TYPE']

In [3]:
nodes.head()

Unnamed: 0,ID,name,LABEL
0,C0871898,Self Defense,Activities & Behaviors
1,C0281991,Steroid Use,Activities & Behaviors
2,C0336963,Mountain climbing,Activities & Behaviors
3,C0010383,Crowding,Activities & Behaviors
4,C1513375,Moderate Exercise,Activities & Behaviors


In [4]:
edges.head()

Unnamed: 0,START_ID,END_ID,TYPE
0,C1273870,C0282623,ADMINISTERED_TO_ABatAB
1,C1273870,C1138603,ADMINISTERED_TO_ABatCI
2,C0556656,C0221192,ADMINISTERED_TO_ABatCI
3,C0441648,C0221192,ADMINISTERED_TO_ABatCI
4,C1273870,C0221192,ADMINISTERED_TO_ABatCI


In [5]:
print(edges.shape[0])
edges = edges.drop_duplicates()
edges = edges.dropna()
edges.shape

20619991


(20619991, 3)

# Look at the Metagraph to see if some edges can be combined

In [12]:
def list_connections(metanode1, metanode2):
    
    # get the ids for the given node
    ids1 = list(nodes.query("LABEL == @metanode1")['ID'])
    
    # if looking at edges between the same metanode, can get edges right away
    if metanode1 == metanode2:
        res = edges.query('START_ID in @ids1 and END_ID in @ids1')
    
    else:
        # Ids for second metanode
        ids2 = list(nodes.query("LABEL == @metanode2")['ID'])
    
        # 
        res1 = edges.query('START_ID in @ids1 and END_ID in @ids2')
        res2 = edges.query('START_ID in @ids2 and END_ID in @ids1')
    
        res = pd.concat([res1, res2])
    
    return res['TYPE'].value_counts()
    

In [7]:
id_to_type = nodes.set_index('ID')['LABEL'].to_dict()

In [9]:
combos = []
for row in edges.drop_duplicates(subset='TYPE').itertuples(index=False):
    combos.append((id_to_type[row[0]], id_to_type[row[1]]))
combos = [tuple(sorted(c)) for c in combos]
combos = sorted(list(set(combos)))
len(combos)

116

In [10]:
combos

[('Activities & Behaviors', 'Activities & Behaviors'),
 ('Activities & Behaviors', 'Anatomy'),
 ('Activities & Behaviors', 'Chemicals & Drugs'),
 ('Activities & Behaviors', 'Concepts & Ideas'),
 ('Activities & Behaviors', 'Devices'),
 ('Activities & Behaviors', 'Disorders'),
 ('Activities & Behaviors', 'Genes & Molecular Sequences'),
 ('Activities & Behaviors', 'Living Beings'),
 ('Activities & Behaviors', 'Objects'),
 ('Activities & Behaviors', 'Occupations'),
 ('Activities & Behaviors', 'Organizations'),
 ('Activities & Behaviors', 'Phenomena'),
 ('Activities & Behaviors', 'Physiology'),
 ('Activities & Behaviors', 'Procedures'),
 ('Anatomy', 'Anatomy'),
 ('Anatomy', 'Chemicals & Drugs'),
 ('Anatomy', 'Concepts & Ideas'),
 ('Anatomy', 'Devices'),
 ('Anatomy', 'Disorders'),
 ('Anatomy', 'Genes & Molecular Sequences'),
 ('Anatomy', 'Geographic Areas'),
 ('Anatomy', 'Living Beings'),
 ('Anatomy', 'Objects'),
 ('Anatomy', 'Occupations'),
 ('Anatomy', 'Organizations'),
 ('Anatomy', 'Pheno

In [13]:
if os.path.exists("../data/edge_pair_counts_orig.pkl"):
    combo_counts = pickle.load(open( "../data/edge_pair_counts_orig.pkl", "rb" ))
else:
    # This block took over 7 hours to run, pickleing for future use
    combo_counts = dict()
    for comb in tqdm(combos):
        combo_counts[comb] = list_connections(comb[0], comb[1])
    pickle.dump(combo_counts, open( "../data/edge_pair_counts_orig.pkl", "wb" ) )

100%|██████████| 116/116 [6:53:34<00:00, 170.02s/it] 


In [14]:
edge_numbs = {k: len(v) for k, v in combo_counts.items()}

In [15]:
most_to_least = sorted(list(edge_numbs.items()), key = lambda x: x[1], reverse=True)
for item in most_to_least:
    print(item[0], ':', item[1])

('Chemicals & Drugs', 'Disorders') : 56
('Chemicals & Drugs', 'Procedures') : 55
('Chemicals & Drugs', 'Concepts & Ideas') : 54
('Chemicals & Drugs', 'Physiology') : 53
('Disorders', 'Procedures') : 53
('Disorders', 'Genes & Molecular Sequences') : 52
('Genes & Molecular Sequences', 'Procedures') : 52
('Genes & Molecular Sequences', 'Physiology') : 52
('Chemicals & Drugs', 'Genes & Molecular Sequences') : 52
('Physiology', 'Procedures') : 52
('Disorders', 'Physiology') : 51
('Concepts & Ideas', 'Genes & Molecular Sequences') : 50
('Concepts & Ideas', 'Disorders') : 49
('Chemicals & Drugs', 'Living Beings') : 49
('Disorders', 'Disorders') : 48
('Chemicals & Drugs', 'Phenomena') : 48
('Concepts & Ideas', 'Procedures') : 46
('Disorders', 'Living Beings') : 46
('Chemicals & Drugs', 'Chemicals & Drugs') : 46
('Anatomy', 'Chemicals & Drugs') : 45
('Concepts & Ideas', 'Physiology') : 45
('Genes & Molecular Sequences', 'Living Beings') : 43
('Anatomy', 'Procedures') : 42
('Living Beings', 'Phy

In [50]:
def change_edge_type(from_type, to_type, swap=False):
    idx = edges.query('TYPE == @from_type').index
    edges.loc[idx, 'TYPE'] = to_type
    if swap:
        tmp = edges.loc[idx, 'START_ID']
        edges.loc[idx, 'START_ID'] = edges.loc[idx, 'END_ID']
        edges.loc[idx, 'END_ID'] = tmp
                                             
def merge_edge_types(from_list, to_type, swap=False):
    for from_type in from_list:
        change_edge_type(from_type, to_type, swap=swap)
        
def drop_edges_from_list(drop_edges):
    idx = edges.query('TYPE in @drop_edges').index
    edges.drop(idx, inplace=True)

In [17]:
combo_num = 0

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Activities & Behaviors 

AFFECTS_ABafAB                   2467
ISA_ABi>AB                       1738
NEG_AFFECTS_ABnafAB               226
METHOD_OF_ABmoAB                  147
LOCATION_OF_ABloAB                100
PREDISPOSES_ABpsAB                 37
COEXISTS_WITH_ABcwAB               13
STIMULATES_ABstAB                  12
TREATS_ABtAB                       12
MANIFESTATION_OF_ABmfo>AB          10
CAUSES_ABcAB                       10
PROCESS_OF_ABpro>AB                 6
PREVENTS_ABpvAB                     4
USES_ABuAB                          4
compared_with_ABcpwAB               3
NEG_PREDISPOSES_ABnpsAB             3
higher_than_ABhtAB                  3
DIAGNOSES_ABdgAB                    2
PRECEDES_ABpc>AB                    2
lower_than_ABltAB                   2
NEG_LOCATION_OF_ABnloAB             2
same_as_ABsaAB                      1
ADMINISTERED_TO_ABatAB              1
NEG_COEXISTS_WITH_ABncwAB           1
different_from_ABdfAB               

In [20]:
# Small number of total relations, will push all edges that would modulate to AFFECTS edge 
to_merge = ['AFFECTS_ABafAB', 'PREDISPOSES_ABpsAB', 'STIMULATES_ABstAB', 'TREATS_ABtAB', 'CAUSES_ABcAB', 'PREVENTS_ABpvAB']
merge_edge_types(to_merge, 'AFFECTS_ABafAB')

In [21]:
combo_num = 1

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Anatomy 

LOCATION_OF_AloAB         1202
AFFECTS_AafAB               80
ASSOCIATED_WITH_AawAB       31
PRODUCES_ABpd>A             28
NEG_LOCATION_OF_AnloAB      23
TREATS_ABtA                  9
NEG_AFFECTS_AnafAB           9
CAUSES_AcAB                  5
METHOD_OF_ABmoA              4
COEXISTS_WITH_ABcwA          4
DISRUPTS_AdsAB               4
NEG_PRODUCES_ABnpdA          2
PROCESS_OF_Apro>AB           2
USES_ABuA                    2
PREDISPOSES_ABpsA            2
PREVENTS_ABpvA               1
STIMULATES_ABstA             1
PRECEDES_ABpc>A              1
AUGMENTS_ABagA               1
DIAGNOSES_ABdgA              1
NEG_PREVENTS_ABnpvA          1
PART_OF_Apo>AB               1
NEG_TREATS_ABntA             1
Name: TYPE, dtype: int64


Small counts here, No need to do anything, these will get dropped

In [22]:
combo_num = 2

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Chemicals & Drugs 

AFFECTS_CDafAB                 3662
USES_ABuCD                     3637
AUGMENTS_CDagAB                2217
CAUSES_CDcAB                   1139
DISRUPTS_CDdsAB                1118
TREATS_CDtAB                    596
ASSOCIATED_WITH_CDawAB          554
NEG_AFFECTS_CDnafAB             521
PRODUCES_ABpd>CD                420
compared_with_ABcpwCD           350
PREDISPOSES_CDpsAB              249
PREVENTS_CDpvAB                 233
INHIBITS_CDinAB                 232
COEXISTS_WITH_CDcwAB            146
METHOD_OF_ABmoCD                117
STIMULATES_ABstCD               116
NEG_AUGMENTS_CDnagAB            108
higher_than_ABhtCD              100
NEG_CAUSES_CDncAB                65
NEG_DISRUPTS_CDndsAB             56
NEG_USES_ABnuCD                  51
NEG_ASSOCIATED_WITH_CDnawAB      26
COMPLICATES_CDcpAB               24
NEG_TREATS_CDntAB                20
PRECEDES_CDpc>AB                 19
lower_than_ABltCD                18
NEG_INHIBITS_CDni

In [25]:
# Small numbers again, will merge to 1 type. AFFECTS for anyhthing would modulate

to_merge = ['AFFECTS_CDafAB', 'AUGMENTS_CDagAB', 'CAUSES_CDcAB',
'DISRUPTS_CDdsAB', 'TREATS_CDtAB', 'ASSOCIATED_WITH_CDawAB',
'PREDISPOSES_CDpsAB', 'PREVENTS_CDpvAB', 'INHIBITS_CDinAB',
'COMPLICATES_CDcpAB']

merge_edge_types(to_merge, 'AFFECTS_CDafAB')

In [26]:
combo_num = 3

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Concepts & Ideas 

ISA_CIi>AB                       302
PROCESS_OF_ABpro>CI              195
AFFECTS_CIafAB                   189
TREATS_ABtCI                     155
METHOD_OF_CImoAB                  84
ISA_ABi>CI                        59
USES_ABuCI                        47
CAUSES_ABcCI                      42
OCCURS_IN_ABoiCI                  42
PREVENTS_ABpvCI                   34
NEG_AFFECTS_CInafAB               33
LOCATION_OF_CIloAB                32
COEXISTS_WITH_ABcwCI              20
DIAGNOSES_ABdgCI                  15
PREDISPOSES_CIpsAB                11
NEG_TREATS_ABntCI                  9
PROCESS_OF_CIpro>AB                8
ADMINISTERED_TO_ABatCI             6
STIMULATES_ABstCI                  6
NEG_PROCESS_OF_ABnproCI            6
MANIFESTATION_OF_CImfo>AB          4
NEG_OCCURS_IN_ABnoiCI              4
PRECEDES_CIpc>AB                   3
AUGMENTS_CIagAB                    2
MANIFESTATION_OF_ABmfo>CI          2
PRECEDES_ABpc>CI             

All way to small in number to ever make it past filtering

In [27]:
combo_num = 4

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Devices 

USES_ABuDV               493
TREATS_DVtAB              94
compared_with_DVcpwAB     22
METHOD_OF_ABmoDV          18
COEXISTS_WITH_DVcwAB      15
NEG_USES_ABnuDV           11
CAUSES_DVcAB              10
PREVENTS_DVpvAB            9
higher_than_DVhtAB         6
LOCATION_OF_DVloAB         3
STIMULATES_DVstAB          2
AFFECTS_DVafAB             2
NEG_PREVENTS_DVnpvAB       1
PROCESS_OF_ABpro>DV        1
NEG_METHOD_OF_DVnmoAB      1
NEG_CAUSES_DVncAB          1
Name: TYPE, dtype: int64


In [28]:
combo_num = 5

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Disorders 

TREATS_ABtDO                     14003
PREDISPOSES_ABpsDO                5493
AFFECTS_DOafAB                    4329
PREVENTS_ABpvDO                   3315
CAUSES_ABcDO                      2176
COEXISTS_WITH_ABcwDO              1255
DISRUPTS_DOdsAB                    968
ASSOCIATED_WITH_ABawDO             784
DIAGNOSES_ABdgDO                   651
NEG_TREATS_ABntDO                  532
PROCESS_OF_DOpro>AB                336
NEG_AFFECTS_ABnafDO                319
NEG_PREDISPOSES_DOnpsAB            289
MANIFESTATION_OF_ABmfo>DO          216
PROCESS_OF_ABpro>DO                132
MANIFESTATION_OF_DOmfo>AB          128
NEG_PREVENTS_ABnpvDO               120
ISA_DOi>AB                         108
ISA_ABi>DO                          78
NEG_CAUSES_ABncDO                   72
NEG_COEXISTS_WITH_DOncwAB           52
PRECEDES_DOpc>AB                    32
METHOD_OF_ABmoDO                    28
NEG_DISRUPTS_DOndsAB                27
NEG_ASSOCIATED_WITH_ABnaw

Larger numbers here, will try for negative and positive regulation

In [30]:
# AB that lessens DO
merge_edge_types(['TREATS_ABtDO',  'PREVENTS_ABpvDO', 'DIAGNOSES_ABdgDO'], 'TREATS_ABtDO')
# AB that increases or assoc with DO
merge_edge_types(['PREDISPOSES_ABpsDO', 'CAUSES_ABcDO',  
                  'COEXISTS_WITH_ABcwDO', 'ASSOCIATED_WITH_ABawDO'], 'ASSOCIATED_WITH_ABawDO')

# DO to AB edges
merge_edge_types(['AFFECTS_DOafAB', 'DISRUPTS_DOdsAB'], 'AFFECTS_DOafAB')



In [31]:
combo_num = 6

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Genes & Molecular Sequences 

AFFECTS_GafAB                 1151
USES_ABuG                      668
ASSOCIATED_WITH_GawAB          598
AUGMENTS_GagAB                 588
CAUSES_GcAB                    417
PREDISPOSES_GpsAB              319
DISRUPTS_GdsAB                 220
TREATS_ABtG                    167
PRODUCES_ABpd>G                115
compared_with_GcpwAB            77
NEG_AFFECTS_GnafAB              72
PREVENTS_GpvAB                  35
NEG_ASSOCIATED_WITH_GnawAB      22
higher_than_GhtAB               21
COEXISTS_WITH_GcwAB             18
NEG_AUGMENTS_GnagAB             18
NEG_CAUSES_GncAB                14
METHOD_OF_GmoAB                 12
INHIBITS_GinAB                  10
STIMULATES_GstAB                 9
NEG_USES_ABnuG                   8
LOCATION_OF_GloAB                5
lower_than_GltAB                 3
NEG_PREDISPOSES_GnpsAB           3
NEG_DISRUPTS_GndsAB              3
NEG_STIMULATES_GnstAB            2
same_as_ABsaG                    

All too small to matter

In [32]:
combo_num = 7

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Living Beings 

PROCESS_OF_ABpro>LB            20632
TREATS_ABtLB                    4972
NEG_PROCESS_OF_ABnproLB          655
AFFECTS_ABafLB                   483
ADMINISTERED_TO_ABatLB           400
USES_LBuAB                       280
NEG_TREATS_ABntLB                233
OCCURS_IN_ABoiLB                  79
DIAGNOSES_LBdgAB                  59
NEG_AFFECTS_ABnafLB               37
CAUSES_LBcAB                      35
NEG_ADMINISTERED_TO_ABnatLB       29
METHOD_OF_ABmoLB                  28
LOCATION_OF_LBloAB                26
PRODUCES_ABpd>LB                   6
NEG_USES_LBnuAB                    4
INTERACTS_WITH_LBiwAB              4
NEG_CAUSES_LBncAB                  3
PRODUCES_LBpd>AB                   3
NEG_OCCURS_IN_ABnoiLB              2
PREVENTS_ABpvLB                    2
PROCESS_OF_LBpro>AB                1
DISRUPTS_LBdsAB                    1
NEG_ASSOCIATED_WITH_LBnawAB        1
Name: TYPE, dtype: int64


'process_of' here is the only potentially useable edge, but we will make undirected

In [34]:
merge_edge_types(['PROCESS_OF_ABpro>LB', 'OCCURS_IN_ABoiLB'], 'OCCURS_IN_ABoiLB')
change_edge_type('USES_LBuAB', 'OCCURS_IN_ABoiLB', swap=True)

In [35]:
combo_num = 8

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Objects 

USES_ABuOB                 81
LOCATION_OF_OBloAB         62
AFFECTS_OBafAB             60
CAUSES_OBcAB               11
TREATS_OBtAB               10
NEG_AFFECTS_OBnafAB         7
METHOD_OF_OBmoAB            3
compared_with_ABcpwOB       3
ISA_OBi>AB                  3
PREDISPOSES_OBpsAB          3
NEG_USES_ABnuOB             2
AUGMENTS_OBagAB             2
ASSOCIATED_WITH_OBawAB      1
NEG_LOCATION_OF_OBnloAB     1
PART_OF_OBpo>AB             1
same_as_ABsaOB              1
PROCESS_OF_ABpro>OB         1
STIMULATES_OBstAB           1
ISA_ABi>OB                  1
PRODUCES_ABpd>OB            1
Name: TYPE, dtype: int64


Too small to use

In [36]:
combo_num = 9

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Occupations 

METHOD_OF_ABmoOC          1911
ISA_ABi>OC                  25
NEG_METHOD_OF_ABnmoOC       20
PROCESS_OF_ABpro>OC         18
COEXISTS_WITH_ABcwOC         5
TREATS_ABtOC                 4
ISA_OCi>AB                   3
PREVENTS_OCpvAB              1
LOCATION_OF_ABloOC           1
ADMINISTERED_TO_ABatOC       1
PREDISPOSES_OCpsAB           1
Name: TYPE, dtype: int64


Too small

In [37]:
combo_num = 10

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Organizations 

LOCATION_OF_ORloAB           6786
NEG_LOCATION_OF_ORnloAB        85
ISA_ORi>AB                     55
AFFECTS_ABafOR                 40
ISA_ABi>OR                      6
NEG_AFFECTS_ABnafOR             4
PROCESS_OF_ABpro>OR             3
PRODUCES_ORpd>AB                2
COEXISTS_WITH_ORcwAB            2
TREATS_ABtOR                    2
METHOD_OF_ABmoOR                1
NEG_COEXISTS_WITH_ORncwAB       1
Name: TYPE, dtype: int64


Too small

In [38]:
combo_num = 11

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Phenomena 

AFFECTS_PHafAB               288
PROCESS_OF_ABpro>PH          188
PROCESS_OF_PHpro>AB           94
TREATS_ABtPH                  48
NEG_AFFECTS_ABnafPH           28
NEG_PROCESS_OF_PHnproAB       14
CAUSES_ABcPH                  11
METHOD_OF_PHmoAB              10
COEXISTS_WITH_ABcwPH           9
ISA_PHi>AB                     6
ISA_ABi>PH                     5
DIAGNOSES_ABdgPH               5
PRECEDES_ABpc>PH               4
PREDISPOSES_PHpsAB             3
MANIFESTATION_OF_PHmfo>AB      3
PREVENTS_ABpvPH                3
NEG_TREATS_ABntPH              2
DISRUPTS_PHdsAB                2
STIMULATES_ABstPH              1
LOCATION_OF_PHloAB             1
NEG_COEXISTS_WITH_ABncwPH      1
Name: TYPE, dtype: int64


In [39]:
combo_num = 12

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Physiology 

AFFECTS_ABafPS               1469
COEXISTS_WITH_PScwAB          836
PROCESS_OF_ABpro>PS           582
TREATS_ABtPS                  210
PROCESS_OF_PSpro>AB           195
NEG_AFFECTS_PSnafAB           177
PRECEDES_ABpc>PS              157
CAUSES_PScAB                  107
PRECEDES_PSpc>AB              100
NEG_PROCESS_OF_ABnproPS        52
NEG_COEXISTS_WITH_PSncwAB      51
DISRUPTS_ABdsPS                44
STIMULATES_ABstPS              34
PREVENTS_ABpvPS                32
ISA_PSi>AB                     29
ISA_ABi>PS                     25
PREDISPOSES_ABpsPS             22
NEG_TREATS_ABntPS              19
USES_PSuAB                     14
METHOD_OF_ABmoPS               11
MANIFESTATION_OF_ABmfo>PS      10
MANIFESTATION_OF_PSmfo>AB       7
NEG_CAUSES_PSncAB               5
DIAGNOSES_ABdgPS                4
NEG_DISRUPTS_ABndsPS            4
NEG_PRECEDES_ABnpcPS            3
NEG_PREDISPOSES_PSnpsAB         2
OCCURS_IN_ABoiPS                2
ASSOCIAT

In [41]:
combo_num = 13

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Activities & Behaviors --- Procedures 

METHOD_OF_ABmoPR             4368
COEXISTS_WITH_ABcwPR         2112
AFFECTS_PRafAB               1142
STIMULATES_ABstPR             994
USES_ABuPR                    797
ISA_PRi>AB                    663
TREATS_PRtAB                  300
LOCATION_OF_PRloAB            224
PRECEDES_PRpc>AB              202
NEG_AFFECTS_PRnafAB           164
DIAGNOSES_PRdgAB              163
compared_with_ABcpwPR         121
NEG_METHOD_OF_PRnmoAB         100
PREVENTS_PRpvAB                89
NEG_COEXISTS_WITH_PRncwAB      62
NEG_STIMULATES_PRnstAB         54
ISA_ABi>PR                     37
higher_than_PRhtAB             30
PRECEDES_ABpc>PR               30
NEG_USES_ABnuPR                21
PROCESS_OF_ABpro>PR            14
CAUSES_ABcPR                   14
COMPLICATES_PRcpAB             13
NEG_TREATS_PRntAB              11
lower_than_PRltAB               6
NEG_LOCATION_OF_ABnloPR         5
NEG_DIAGNOSES_PRndgAB           4
NEG_PREVENTS_PRnpvAB            4
NEG_high

In [42]:
combo_num = 14

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Anatomy 

PART_OF_Apo>A              202994
LOCATION_OF_AloA            72116
NEG_PART_OF_AnpoA            3290
PRODUCES_Apd>A               1860
NEG_LOCATION_OF_AnloA        1827
AFFECTS_AafA                  142
COEXISTS_WITH_AcwA            138
NEG_PRODUCES_AnpdA             63
AUGMENTS_AagA                  36
INHIBITS_AinA                  20
ASSOCIATED_WITH_AawA           19
DISRUPTS_AdsA                  15
NEG_AFFECTS_AnafA               8
compared_with_AcpwA             7
INTERACTS_WITH_AiwA             5
STIMULATES_AstA                 4
higher_than_AhtA                3
CONVERTS_TO_Act>A               3
NEG_COEXISTS_WITH_AncwA         2
ADMINISTERED_TO_AatA            2
CAUSES_AcA                      2
lower_than_AltA                 1
PROCESS_OF_Apro>A               1
NEG_AUGMENTS_AnagA              1
same_as_AsaA                    1
different_from_AdfA             1
Name: TYPE, dtype: int64


In [44]:
merge_edge_types(['PART_OF_Apo>A', 'LOCATION_OF_AloA'], 'LOCATION_OF_AloA')

In [45]:
combo_num = 15

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Chemicals & Drugs 

LOCATION_OF_AloCD             860360
PART_OF_CDpo>A                325076
AFFECTS_CDafA                 180888
AUGMENTS_CDagA                157053
PRODUCES_Apd>CD               108397
DISRUPTS_CDdsA                 76820
NEG_LOCATION_OF_AnloCD         21307
NEG_AFFECTS_CDnafA             15580
ADMINISTERED_TO_CDatA          12451
NEG_PART_OF_CDnpoA              6877
NEG_PRODUCES_AnpdCD             6827
NEG_AUGMENTS_CDnagA             5232
PRODUCES_CDpd>A                 3913
NEG_DISRUPTS_CDndsA             2937
COEXISTS_WITH_AcwCD              642
PART_OF_Apo>CD                   334
INTERACTS_WITH_AiwCD             324
TREATS_CDtA                      278
CAUSES_CDcA                      264
ASSOCIATED_WITH_AawCD            205
NEG_ADMINISTERED_TO_CDnatA       163
STIMULATES_CDstA                 128
USES_AuCD                        108
INHIBITS_AinCD                   102
compared_with_CDcpwA              50
higher_than_AhtCD                 12
NEG_IN

We will go for two distinct types.  

1. Location where CD naturally exists
2. Location where CD has an effect

In [47]:
# Change to a localization of compound
to_merge = ['LOCATION_OF_AloCD', 'PART_OF_CDpo>A', 'PRODUCES_Apd>CD','COEXISTS_WITH_AcwCD']
merge_edge_types(to_merge, 'LOCATION_OF_AloCD')
change_edge_type('PRODUCES_CDpd>A', 'LOCATION_OF_AloCD', swap=True)
 
# Change to localization of compound's affect
to_merge = ['AFFECTS_CDafA', 'AUGMENTS_CDagA',  'DISRUPTS_CDdsA', 'ADMINISTERED_TO_CDatA',  'TREATS_CDtA']
merge_edge_types(to_merge, 'AFFECTS_CDafA')

In [48]:
combo_num = 16

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Concepts & Ideas 

LOCATION_OF_AloCI             33058
NEG_LOCATION_OF_CInloA         1196
PART_OF_CIpo>A                 1074
PART_OF_Apo>CI                 1025
AFFECTS_CIafA                   193
ASSOCIATED_WITH_AawCI           146
PROCESS_OF_CIpro>A               69
AUGMENTS_CIagA                   47
PRODUCES_Apd>CI                  44
NEG_PART_OF_AnpoCI               26
CAUSES_AcCI                      24
DISRUPTS_CIdsA                   24
PRODUCES_CIpd>A                  22
NEG_AFFECTS_CInafA               14
OCCURS_IN_AoiCI                  13
COEXISTS_WITH_AcwCI              13
NEG_ASSOCIATED_WITH_AnawCI       12
PROCESS_OF_Apro>CI                9
ADMINISTERED_TO_CIatA             7
ISA_Ai>CI                         7
USES_CIuA                         6
TREATS_AtCI                       6
DIAGNOSES_CIdgA                   5
NEG_CAUSES_AncCI                  2
compared_with_AcpwCI              2
NEG_PRODUCES_AnpdCI               1
PRECEDES_Apc>CI                  

Only 1 true relation here:  Location_of

In [51]:
to_merge = ['LOCATION_OF_AloCI', 'ASSOCIATED_WITH_AawCI','PRODUCES_Apd>CI', 'PART_OF_Apo>CI']
merge_edge_types(to_merge, 'LOCATION_OF_AloCI')       

to_merge = ['AFFECTS_CIafA', 'PART_OF_CIpo>A','PROCESS_OF_CIpro>A']
merge_edge_types(to_merge, 'LOCATION_OF_AloCI', swap=True)       

In [52]:
combo_num = 17

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Devices 

LOCATION_OF_AloDV         33917
NEG_LOCATION_OF_AnloDV      433
USES_AuDV                   126
PART_OF_Apo>DV               97
PART_OF_DVpo>A               71
TREATS_DVtA                  64
compared_with_DVcpwA         19
AFFECTS_DVafA                 8
CAUSES_DVcA                   7
DISRUPTS_DVdsA                4
AUGMENTS_DVagA                3
PRODUCES_Apd>DV               2
NEG_TREATS_DVntA              2
higher_than_DVhtA             2
NEG_PART_OF_AnpoDV            2
METHOD_OF_DVmoA               2
PREVENTS_DVpvA                1
ADMINISTERED_TO_DVatA         1
NEG_AUGMENTS_DVnagA           1
lower_than_DVltA              1
ASSOCIATED_WITH_AawDV         1
NEG_USES_AnuDV                1
COEXISTS_WITH_AcwDV           1
Name: TYPE, dtype: int64


Location of is the only useable edge, no merges needed

In [53]:
combo_num = 18

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Disorders 

LOCATION_OF_AloDO             500137
PART_OF_DOpo>A                 57619
PART_OF_Apo>DO                 20042
AFFECTS_DOafA                  15019
NEG_LOCATION_OF_AnloDO         10574
ASSOCIATED_WITH_AawDO           9586
CAUSES_AcDO                     5788
PRODUCES_DOpd>A                 3900
DISRUPTS_DOdsA                  1812
NEG_PART_OF_DOnpoA               738
COEXISTS_WITH_AcwDO              607
NEG_AFFECTS_AnafDO               574
PREDISPOSES_DOpsA                373
NEG_ASSOCIATED_WITH_AnawDO       279
NEG_CAUSES_AncDO                 200
TREATS_AtDO                      185
PROCESS_OF_DOpro>A               183
AUGMENTS_AagDO                   111
NEG_PRODUCES_DOnpdA              103
MANIFESTATION_OF_DOmfo>A          54
NEG_DISRUPTS_DOndsA               48
PRODUCES_Apd>DO                   43
DIAGNOSES_AdgDO                   40
PREVENTS_ApvDO                    23
PRECEDES_Apc>DO                   15
ISA_DOi>A                         12
NEG_PREDISPOSE

Several large count edges here, all will be merged to 'location_of'

In [55]:
to_merge = ['LOCATION_OF_AloDO', 'PART_OF_Apo>DO','ASSOCIATED_WITH_AawDO', 'PRODUCES_Apd>DO']
merge_edge_types(to_merge, 'LOCATION_OF_AloDO')

to_merge = ['PART_OF_DOpo>A',  'AFFECTS_DOafA', 'PROCESS_OF_DOpro>A']
merge_edge_types(to_merge, 'LOCATION_OF_AloDO', swap=True)

In [56]:
combo_num = 19

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Genes & Molecular Sequences 

LOCATION_OF_AloG             419681
PART_OF_Gpo>A                234502
PRODUCES_Apd>G                72412
AFFECTS_GafA                  68662
AUGMENTS_GagA                 68194
DISRUPTS_GdsA                 28793
NEG_LOCATION_OF_AnloG         10738
NEG_PART_OF_GnpoA              4658
NEG_PRODUCES_AnpdG             4569
NEG_AFFECTS_GnafA              4437
PRODUCES_Gpd>A                 2772
ADMINISTERED_TO_GatA           2461
NEG_AUGMENTS_GnagA             1857
COEXISTS_WITH_GcwA             1798
NEG_DISRUPTS_GndsA              747
INTERACTS_WITH_AiwG             519
INHIBITS_GinA                   127
STIMULATES_GstA                 118
PART_OF_Apo>G                   110
CAUSES_GcA                      103
ISA_Ai>G                         72
ASSOCIATED_WITH_GawA             62
compared_with_GcpwA              28
TREATS_GtA                       27
NEG_INTERACTS_WITH_GniwA         25
USES_AuG                         22
NEG_COEXISTS_WITH_Ancw

Here we will try for 3 associations:

1. Associated With (or localizes to)
2. Positive assocation (upregulated in)
3. Negative Association (downregulated in)

In [58]:
to_merge = ['LOCATION_OF_AloG', 'PRODUCES_Apd>G']
merge_edge_types(to_merge, 'LOCATION_OF_AloG')
to_merge = ['ASSOCIATED_WITH_GawA', 'COEXISTS_WITH_GcwA', 'PART_OF_Gpo>A']
merge_edge_types(to_merge, 'LOCATION_OF_AloG', swap=True)

 
to_merge = ['AFFECTS_GafA', 'AUGMENTS_GagA', 'DISRUPTS_GdsA', 'INHIBITS_GinA', 'STIMULATES_GstA']
merge_edge_types(to_merge, 'AFFECTS_GafA')
change_edge_type('INTERACTS_WITH_AiwG', 'AFFECTS_GafA', swap=True)


to_merge = ['NEG_LOCATION_OF_AnloG', 'NEG_PRODUCES_AnpdG', 'NEG_COEXISTS_WITH_AncwG']
merge_edge_types(to_merge, 'NEG_LOCATION_OF_AnloG')
to_merge = ['NEG_ASSOCIATED_WITH_GnawA','NEG_PART_OF_GnpoA']
merge_edge_types(to_merge, 'NEG_LOCATION_OF_AnloG', swap=True)

In [59]:
combo_num = 20

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Geographic Areas 

LOCATION_OF_GAloA      26
AFFECTS_GAafA           2
PART_OF_Apo>GA          1
PART_OF_GApo>A          1
NEG_AUGMENTS_GAnagA     1
Name: TYPE, dtype: int64


Too Small

In [60]:
combo_num = 21

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Living Beings 

PART_OF_Apo>LB                164385
LOCATION_OF_AloLB              40804
NEG_PART_OF_AnpoLB              1622
NEG_LOCATION_OF_AnloLB           891
PROCESS_OF_Apro>LB               678
PART_OF_LBpo>A                   400
AFFECTS_LBafA                    126
TREATS_AtLB                       99
ASSOCIATED_WITH_AawLB             64
ADMINISTERED_TO_LBatA             49
USES_LBuA                         34
PRODUCES_Apd>LB                   32
PRODUCES_LBpd>A                   27
AUGMENTS_LBagA                    27
NEG_PROCESS_OF_AnproLB            23
OCCURS_IN_AoiLB                   18
CAUSES_LBcA                       17
DIAGNOSES_LBdgA                   10
DISRUPTS_LBdsA                     8
INTERACTS_WITH_AiwLB               8
NEG_AFFECTS_AnafLB                 7
COEXISTS_WITH_LBcwA                3
ISA_Ai>LB                          2
NEG_PRODUCES_AnpdLB                2
NEG_TREATS_AntLB                   2
NEG_COEXISTS_WITH_LBncwA           1
NEG_AUGMEN

After spotchecking several instantances of each of part_of and location_of;

`Part_of` shows a partiuclar anatomy belongs to a particular living being  
`Location_of` shows where a particular living being (usually a microorganism) occurs

We will change Part_of to occurs_in to remove the directionality of the edge, which is unimportant for this study.

In [63]:
to_merge = ['OCCURS_IN_AoiLB' 'PART_OF_Apo>LB']
merge_edge_types(to_merge, 'OCCURS_IN_AoiLB')

In [64]:
combo_num = 22

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Objects 

LOCATION_OF_AloOB         3031
PART_OF_OBpo>A             332
AFFECTS_OBafA              242
AUGMENTS_OBagA             107
PART_OF_Apo>OB              60
DISRUPTS_OBdsA              58
NEG_LOCATION_OF_AnloOB      39
PRODUCES_Apd>OB             25
NEG_AFFECTS_OBnafA          25
USES_AuOB                   14
ISA_Ai>OB                    4
ADMINISTERED_TO_OBatA        3
TREATS_OBtA                  3
compared_with_OBcpwA         3
NEG_PART_OF_OBnpoA           3
INTERACTS_WITH_OBiwA         2
PRODUCES_OBpd>A              2
NEG_PRODUCES_AnpdOB          2
NEG_CAUSES_OBncA             1
NEG_AUGMENTS_OBnagA          1
CAUSES_OBcA                  1
higher_than_OBhtA            1
INHIBITS_OBinA               1
ASSOCIATED_WITH_OBawA        1
Name: TYPE, dtype: int64


Too small for any need

In [65]:
combo_num = 23

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Occupations 

LOCATION_OF_OCloA     59
METHOD_OF_AmoOC        7
PART_OF_Apo>OC         3
AFFECTS_OCafA          1
PROCESS_OF_OCpro>A     1
TREATS_OCtA            1
CAUSES_AcOC            1
Name: TYPE, dtype: int64


In [66]:
combo_num = 24

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Organizations 

LOCATION_OF_AloOR    8
PART_OF_Apo>OR       6
PART_OF_ORpo>A       6
AUGMENTS_ORagA       4
NEG_USES_AnuOR       1
Name: TYPE, dtype: int64


In [67]:
combo_num = 25

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Phenomena 

LOCATION_OF_AloPH             6824
AFFECTS_AafPH                  400
PROCESS_OF_Apro>PH             158
ASSOCIATED_WITH_AawPH          136
NEG_LOCATION_OF_AnloPH         121
PART_OF_PHpo>A                 103
PART_OF_Apo>PH                  59
AUGMENTS_PHagA                  53
PRODUCES_PHpd>A                 42
PROCESS_OF_PHpro>A              34
NEG_AFFECTS_PHnafA              33
DISRUPTS_PHdsA                  28
PRODUCES_Apd>PH                 15
CAUSES_PHcA                      9
NEG_PROCESS_OF_AnproPH           5
COEXISTS_WITH_AcwPH              4
NEG_ASSOCIATED_WITH_AnawPH       3
DIAGNOSES_PHdgA                  1
NEG_DISRUPTS_PHndsA              1
ISA_PHi>A                        1
NEG_PRODUCES_PHnpdA              1
TREATS_AtPH                      1
MANIFESTATION_OF_PHmfo>A         1
NEG_COEXISTS_WITH_AncwPH         1
PREDISPOSES_ApsPH                1
Name: TYPE, dtype: int64


In [68]:
combo_num = 26

pair = combos[combo_num]

print(pair[0], '---', pair[1], '\n')
print(combo_counts[combos[combo_num]])

Anatomy --- Physiology 

AFFECTS_AafPS                 18336
PROCESS_OF_PSpro>A             8893
LOCATION_OF_AloPS              3490
PRODUCES_PSpd>A                1301
NEG_AFFECTS_AnafPS             1265
PART_OF_PSpo>A                  459
PART_OF_Apo>PS                  452
ASSOCIATED_WITH_AawPS           191
NEG_PROCESS_OF_PSnproA          167
COEXISTS_WITH_AcwPS             145
CAUSES_PScA                      75
AUGMENTS_PSagA                   74
NEG_LOCATION_OF_AnloPS           70
PROCESS_OF_Apro>PS               67
DISRUPTS_PSdsA                   50
NEG_PRODUCES_PSnpdA              43
PRODUCES_Apd>PS                  38
PRECEDES_PSpc>A                  22
MANIFESTATION_OF_Amfo>PS         12
PRECEDES_Apc>PS                   9
TREATS_AtPS                       9
NEG_COEXISTS_WITH_AncwPS          8
NEG_ASSOCIATED_WITH_PSnawA        6
NEG_PART_OF_PSnpoA                6
MANIFESTATION_OF_PSmfo>A          4
NEG_CAUSES_AncPS                  2
NEG_DISRUPTS_PSndsA               2
ISA

In [69]:
['AFFECTS_AafPS', 'LOCATION_OF_AloPS', 'PROCESS_OF_PSpro>A'
'PRODUCES_PSpd>A', 'NEG_AFFECTS_AnafPS', 'PART_OF_PSpo>A',
'PART_OF_Apo>PS', 'ASSOCIATED_WITH_AawPS', 'NEG_PROCESS_OF_PSnproA',
'COEXISTS_WITH_AcwPS', 'CAUSES_PScA', 'AUGMENTS_PSagA',
'NEG_LOCATION_OF_AnloPS', 'PROCESS_OF_Apro>PS', 'DISRUPTS_PSdsA',
'NEG_PRODUCES_PSnpdA', 'PRODUCES_Apd>PS', 'PRECEDES_PSpc>A',
'MANIFESTATION_OF_Amfo>PS', 'PRECEDES_Apc>PS', 'TREATS_AtPS',
'NEG_COEXISTS_WITH_AncwPS', 'NEG_ASSOCIATED_WITH_PSnawA',
'NEG_PART_OF_PSnpoA', 'MANIFESTATION_OF_PSmfo>A', 'NEG_CAUSES_AncPS',
'NEG_DISRUPTS_PSndsA', 'ISA_Ai>PS', 'NEG_AUGMENTS_AnagPS',
'ADMINISTERED_TO_PSatA', 'NEG_INHIBITS_AninPS', 'NEG_STIMULATES_AnstPS',
'USES_AuPS', 'PREDISPOSES_PSpsA']

Index(['AFFECTS_AafPS', 'PROCESS_OF_PSpro>A', 'LOCATION_OF_AloPS',
       'PRODUCES_PSpd>A', 'NEG_AFFECTS_AnafPS', 'PART_OF_PSpo>A',
       'PART_OF_Apo>PS', 'ASSOCIATED_WITH_AawPS', 'NEG_PROCESS_OF_PSnproA',
       'COEXISTS_WITH_AcwPS', 'CAUSES_PScA', 'AUGMENTS_PSagA',
       'NEG_LOCATION_OF_AnloPS', 'PROCESS_OF_Apro>PS', 'DISRUPTS_PSdsA',
       'NEG_PRODUCES_PSnpdA', 'PRODUCES_Apd>PS', 'PRECEDES_PSpc>A',
       'MANIFESTATION_OF_Amfo>PS', 'PRECEDES_Apc>PS', 'TREATS_AtPS',
       'NEG_COEXISTS_WITH_AncwPS', 'NEG_ASSOCIATED_WITH_PSnawA',
       'NEG_PART_OF_PSnpoA', 'MANIFESTATION_OF_PSmfo>A', 'NEG_CAUSES_AncPS',
       'NEG_DISRUPTS_PSndsA', 'ISA_Ai>PS', 'NEG_AUGMENTS_AnagPS',
       'ADMINISTERED_TO_PSatA', 'NEG_INHIBITS_AninPS', 'NEG_STIMULATES_AnstPS',
       'USES_AuPS', 'PREDISPOSES_PSpsA'],
      dtype='object')

In [71]:
edges.query('TYPE == "AFFECTS_AafPS"')

Unnamed: 0,START_ID,END_ID,TYPE
183392,C1179962,C1516334,AFFECTS_AafPS
183393,C0025979,C1160107,AFFECTS_AafPS
183394,C0014239,C1154402,AFFECTS_AafPS
183395,C0085262,C0015283,AFFECTS_AafPS
183396,C0037863,C0037864,AFFECTS_AafPS
183397,C1167250,C0301872,AFFECTS_AafPS
183398,C0040578,C0243144,AFFECTS_AafPS
183399,C0038250,C0007620,AFFECTS_AafPS
183400,C1518439,C0037083,AFFECTS_AafPS
183401,C0230931,C0016243,AFFECTS_AafPS
