In [138]:
import networkx as nx
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [84]:
df = pd.read_json('../train_with_graph.json')

In [85]:
df.head()

Unnamed: 0,answer,candidates,graph,id,query,relation,target
0,DB00072,"[DB00072, DB00294, DB00338, DB00341, DB00588, ...","[{'S73A': 97, 'P13232': 23, 'P11388': 52, 'P10...",MH_train_0,interacts_with DB00773?,interacts_with,DB00773
1,DB06822,"[DB00294, DB00313, DB00588, DB00755, DB00783, ...","[{'O43633': 2, 'Q16552': 12, 'P61073': 40, 'P1...",MH_train_1,interacts_with DB09079?,interacts_with,DB09079
10,DB01393,"[DB00035, DB00290, DB00294, DB00313, DB00322, ...","[{'P00519': 25, 'P29474': 55, 'P56279': 61, 'P...",MH_train_10,interacts_with DB01367?,interacts_with,DB01367
100,DB00622,"[DB00382, DB00477, DB00486, DB00588, DB00622, ...","[{'DB00171': 67, 'SL65': 18, 'P33261': 50, 'AM...",MH_train_100,interacts_with DB06480?,interacts_with,DB06480
1000,DB00834,"[DB00072, DB00293, DB00391, DB00682, DB00834, ...","[{'A1A1': 88, 'O00329': 5, 'P00519': 16, 'P070...",MH_train_1000,interacts_with DB00277?,interacts_with,DB00277


In [96]:
df['has_answer'] = df.apply(lambda row: 1 if row['answer'] in row['graph'][0] else 0, axis=1)
df['has_target'] = df.apply(lambda row: 1 if row['target'] in row['graph'][0] else 0, axis=1)
df['has_candidates'] = df.apply(lambda row: sum([1 if (c != row['answer'] and c in row['graph'][0])
                                                 else 0 for c in row['candidates']]), axis=1)
df['total_candidates_exc_answer'] = df.apply(lambda row: len(row['candidates']) - 1, axis=1)

#### Total number of queries

In [91]:
len(df)

1620

#### Queries that have the target in the graph

In [90]:
df['has_target'].sum()

1376

#### Queries that have the answer in the graph

In [88]:
df['has_answer'].sum()

1056

#### Total candidates except the answer candidates

In [97]:
df['total_candidates_exc_answer'].sum()

12816

#### Total candidates except answer candidates in the graph

In [98]:
df['has_candidates'].sum()

8363

#### Number of answer-target pairs in the same connected component

In [112]:
def are_connected(graph_info, source, target):
    G = nx.from_numpy_matrix(np.array(graph_info[1]))
    if source in graph_info[0] and target in graph_info[0]:
        connected = 1 if nx.has_path(G, graph_info[0][source], graph_info[0][target]) else 0
        return connected
    else:
        return 0

In [113]:
df['answer_target_connected'] = df.apply(lambda row: are_connected(row['graph'], row['answer'], row['target']), 
                                         axis=1)

In [114]:
df['answer_target_connected'].sum()

633

In [119]:
G = nx.from_numpy_matrix(np.array(df['graph'][0][1]))
pos = nx.kamada_kawai_layout(G)

In [211]:
def plot_graph(graph_info, answer, target, candidates):
    G = nx.from_numpy_matrix(np.array(graph_info[1]))
    pos = nx.fruchterman_reingold_layout(G)
    
    edge_trace = go.Scatter(
        x=[],
        y=[],
        text=[],
        line=dict(width=0.7,color='black'),
        hoverinfo='text',
        mode='lines')

    for edge in G.edges():
        x0 = pos[edge[0]][0]
        y0 = pos[edge[0]][1]
        
        x1 = pos[edge[1]][0]
        y1 = pos[edge[1]][1]
        edge_trace['x'] += [x0, x1, None]
        edge_trace['y'] += [y0, y1, None]
        edge_trace['text'].append(graph_info[2][str((edge[0], edge[1]))])

    node_trace = go.Scatter(
        x=[],
        y=[],
        text=[],
        mode='markers',
        hoverinfo='text',
        marker={'color': [],
                'size': 14,
                'line': {
                         'color' : 'black',
                         'width' : 1
                        }
               })

    for node in G.nodes():
        x = pos[node][0]
        y = pos[node][1]
        node_trace['x'].append(x)
        node_trace['y'].append(y)
        node_trace['text'].append(graph_info[3][node])
        
        if graph_info[3][node] == target:
            color = 'red'
        elif graph_info[3][node] == answer:
            color = 'green'
        elif graph_info[3][node] in candidates:
            color = 'blue'
        else:
            color='grey'
        node_trace['marker']['color'].append(color)
    
    
    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                    title='<br>KB graph',
                    titlefont=dict(size=16),
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),

                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

    iplot(fig, filename='networkx')

In [232]:
plot_idx = 0
plot_graph(df['graph'][plot_idx], df['answer'][plot_idx], df['target'][plot_idx], df['candidates'][plot_idx])

In [217]:
paths_df = pd.read_json('../dummy_dataset.json')

In [218]:
paths_df.head()

Unnamed: 0,entity_paths,id,label,relation,relation_paths,source,target
0,"[[DB00072, P04626, MKN74, P16104, P11388, DB00...",MH_train_0,1,interacts_with,"[[[ent_2, -, containing, therapy, is, a, stand...",DB00072,DB00773
1,"[[DB00338, O75030, DB00133, P11388, DB00773]]",MH_train_0,0,interacts_with,"[[[ent_2, had, no, significant, inhibitory, ef...",DB00072,DB00773
2,"[[DB00588, P05112, P05231, P16104, P11388, DB0...",MH_train_0,0,interacts_with,"[[[ent_2, also, reversed, the, increase, in, b...",DB00072,DB00773
3,"[[DB00820, P29474, K373, P16104, P11388, DB007...",MH_train_0,0,interacts_with,"[[[Normalized, expression, of, ent_1, was, 82,...",DB00072,DB00773
4,"[[DB02546, P16104, P11388, DB00773]]",MH_train_0,0,interacts_with,"[[[Tubacin, enhances, DNA, damage, induced, by...",DB00072,DB00773


In [226]:
def pretty_print_path(entities, relations):
    for e, r in zip(entities, relations):
        print('({})\n|\nV\n{}\n|\nV\n'.format(e, ' '.join(r)), end='')

In [231]:
idx = 4
pretty_print_path(paths_df['entity_paths'][idx][0], 
                  paths_df['relation_paths'][idx][0])

(DB02546)
|
V
Tubacin enhances DNA damage induced by etoposide or ent_2 as indicated by increased accumulation of γ ent_1 and activation of the checkpoint kinase Chk2 .
|
V
(P16104)
|
V
The following studies in tissue culture models have suggested that acidic pH acts like a ent_1 poison to induce ent_1 - mediated DNA damage : ( i ) acidic pH induces ent_1 - dependent DNA damage signals as evidenced by up - regulation of p53 and ent_x - 139 phosphorylation of ent_2 [ a substrate for ataxia telangiectasia mutated ( ent_x ) ent_x and Rad3 - related ( ATR ) kinases ] ; ( ii ) acidic pH - induced cytotoxicity in tumor cells is reduced in ent_1 - deficient cells ; ( iii ) acidic pH increases the mutation frequency of the hypoxanthine phosphoribosyl transferase ( ent_x ) gene in a ent_1 - dependent manner ; and ( iv ) acidic pH induces reversible ent_1 - mediated DNA strand breaks in vitro .
|
V
(P11388)
|
V
RESULTS : Similar to the ent_1 - targeting drug , etoposide ( ent_2 ) , the NO - dono