In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import sys
import json
import torch
import pickle
import itertools

import networkx as nx
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt 
from tqdm import tqdm
from collections import Counter 
from copy import deepcopy
from torch.utils.data import TensorDataset

sys.path.insert(0, "../")

## Load PlaNet knowledge graph

In [None]:
import knowledge_graph
from knowledge_graph.kg import *

In [None]:
def load_kg_vocab():
    kgid2x = {}
    x2kgid = {}
    for line in open('../data/graph/entities.dict'):
        x, kgid = line.split()
        kgid2x[kgid] = int(x)
        x2kgid[int(x)] = kgid
    relname2etype = {}
    etype2relname = {}
    for line in open('../data/graph/relations.dict'):
        etype, name = line.split()
        name = name[name.find('rel-name-'):][len('rel-name-'):]
        relname2etype[name] = int(etype)
        etype2relname[int(etype)] = name 
    return kgid2x, x2kgid, relname2etype, etype2relname

kgid2x, x2kgid, relname2etype, etype2relname = load_kg_vocab()

## Run PlaNet models for new clinical trial

In [None]:
from utils.demo_utils import load_model, load_model_and_data, model_inference, prepare_runner
from gcn_models.utils import set_seed
from gcn_models.evaluator import Evaluator
set_seed(24)

In [None]:
#Load BERT embedder
from utils.text_bert_features import TextBertFeatures
bert_model = TextBertFeatures(
    bert_model='microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext',
    device='cpu' #'cuda:1'
)

### Load the new clinical trial data

In [None]:
new_trial_data = pkl.load(open('small_data/trial_data_NCT02370680.pkl', 'rb'))

In [None]:
new_trial_1, new_trial_2 = new_trial_data
new_trial_1, new_trial_2

#### Get the feature of the new trials

In [None]:
def get_trial_feature(new_trial):
    _new_emb = bert_model._embed(new_trial['arm_text'])
    return np.concatenate([_new_emb, new_trial['trial_attribute_feats_vec']])

new_emb_1 = get_trial_feature(new_trial_1)
new_emb_2 = get_trial_feature(new_trial_2)
print(new_emb_1.shape, new_emb_2.shape)

#### Get the KG info of the new trials

In [None]:
def get_new_edges(new_trial, the_x):
    new_edges = []; new_etypes = []; seen = set()
    for edge in new_trial_1['trial_arm_edges']:
        h = the_x
        t = kgid2x[edge['kg_id']]
        r = relname2etype[edge['relation']]
        new_edges.append([h,t])
        new_etypes.append(r)
        new_edges.append([t,h])
        new_etypes.append(r+26)
        seen.add(r)
    for r in [21,22,23,24,25]:
        if r not in seen:
            new_edges.append([the_x,0])
            new_etypes.append(r)
            new_edges.append([0,the_x])
            new_etypes.append(r+26)
    new_edges = torch.tensor(new_edges).t()
    new_etypes = torch.tensor(new_etypes)
    print(new_edges.shape)
    print(new_etypes.shape)
    return new_edges, new_etypes

### Predict for the AE task

#### Load the AE data and model

In [None]:
aeidx2kgid = pkl.load(open('small_data/ae1017_idx2aename.pkl', 'rb'))
aekgid2name = pkl.load(open('small_data/ae_kgid2name.pkl', 'rb'))
aeidx2name = [aekgid2name[kgid] for kgid in aeidx2kgid]

def predict_positive_ae(pred, threshold):
    #pred: a list of length 1017
    out = []
    for idx, val in enumerate(pred):
        if val > threshold:
            out.append(aeidx2name[idx])
    return out

In [None]:
(ae_dataset, ae_tasks), \
    ae_encoder, ae_bert_encoder, \
    ae_model, ae_args, ae_runner \
    = load_model_and_data('../data/models/ae_model_shxo9bgw/ckpt.pt', device='cpu')


#### Add the new trial to the inference dataset

In [None]:
ae_dataset_bk = deepcopy(ae_dataset)
ae_tasks_bk = deepcopy(ae_tasks)
ae_args_bk = deepcopy(ae_args)

In [None]:
def add_new_trial_to_ae_dataset(dataset):
    # Update the df
    dataset.df = dataset.df[dataset.df['split']=='test'].head(1)
    the_x = dataset.df.iloc[0]['x']
    the_kgid = dataset.df.iloc[0]['kgid']
    print('the_x', the_x, 'the_kgid', the_kgid)
    
    # Update the node features
    _node_feats = deepcopy(dataset.node_feats)
    pos = _node_feats[_node_feats['node_id'] == the_kgid].index[0]
    _node_feats.at[pos, 'emb'] = new_emb_1
    dataset.node_feats = _node_feats
    
    # Update the graph edges
    _graph = deepcopy(dataset.graph)
    new_edges, new_etypes = get_new_edges(new_trial_1, the_x)
    
    _edge_index = _graph.data.edge_index.clone()
    columns_with_the_x = _edge_index.eq(the_x).any(dim=0)
    _edge_index = _edge_index[:, ~columns_with_the_x]
    _edge_index = torch.cat([_edge_index, new_edges], dim=1)
    _graph.data.edge_index = _edge_index
    
    _edge_type = _graph.data.edge_type.clone()
    _edge_type = _edge_type[~columns_with_the_x]
    _edge_type = torch.cat([_edge_type, new_etypes], dim=0)
    _graph.data.edge_type = _edge_type
    
    dataset.graph = _graph
    
    # Make the dataset ready for inference
    x = dataset._get_data_x(dataset.df)
    print('x', x)
    tensors = [x]
    tensors.extend([dataset.task_ys[0][0].repeat(len(x), 1)])
    tensors.extend([dataset.sample_weight_masks[0][0].repeat(len(x), 1)])
    dataset.datasets['test'] = TensorDataset(*tensors)
    
    return dataset

#### Run inference for the new trial

In [None]:
ae_dataset = add_new_trial_to_ae_dataset(ae_dataset)
ae_args, ae_runner, ae_encoder \
    = prepare_runner(ae_args, ae_dataset, ae_encoder, ae_bert_encoder, ae_model, device='cpu')

_, y_test_pred, _ = model_inference(ae_runner, mode='test') 
print ('y_test_pred.size()', y_test_pred.size())

#### Predicted AEs:

In [None]:
print(predict_positive_ae(y_test_pred[0], threshold=0.05))

### Predict for the Safety task

#### Load the safety data and model

In [None]:
(sf_dataset, sf_tasks), \
    sf_encoder, sf_bert_encoder, \
    sf_model, sf_args, sf_runner \
    = load_model_and_data('../data/models/safety_model_1xekl810/ckpt.pt', device='cpu')

In [None]:
sf_dataset_bk = deepcopy(sf_dataset)
sf_tasks_bk = deepcopy(sf_tasks)
sf_args_bk = deepcopy(sf_args)

In [None]:
def add_new_trial_to_safety_dataset(dataset):
    # Update the df
    dataset.df = dataset.df[dataset.df['split']=='test'].head(1)
    the_x = dataset.df.iloc[0]['x']
    the_kgid = dataset.df.iloc[0]['kgid']
    print('the_x', the_x, 'the_kgid', the_kgid)
    
    # Update the node features
    _node_feats = deepcopy(dataset.node_feats)
    pos = _node_feats[_node_feats['node_id'] == the_kgid].index[0]
    _node_feats.at[pos, 'emb'] = new_emb_1
    dataset.node_feats = _node_feats
    
    # Update the graph edges
    _graph = deepcopy(dataset.graph)
    new_edges, new_etypes = get_new_edges(new_trial_1, the_x)
    
    _edge_index = _graph.data.edge_index.clone()
    columns_with_the_x = _edge_index.eq(the_x).any(dim=0)
    _edge_index = _edge_index[:, ~columns_with_the_x]
    _edge_index = torch.cat([_edge_index, new_edges], dim=1)
    _graph.data.edge_index = _edge_index
    
    _edge_type = _graph.data.edge_type.clone()
    _edge_type = _edge_type[~columns_with_the_x]
    _edge_type = torch.cat([_edge_type, new_etypes], dim=0)
    _graph.data.edge_type = _edge_type
    
    dataset.graph = _graph
    
    # Make the dataset ready for inference
    x = dataset._get_data_x(dataset.df)
    print('x', x)
    tensors = [x]
    tensors.extend([dataset.task_ys[0][0].repeat(len(x), 1)])
    tensors.extend([dataset.sample_weight_masks[0][0].repeat(len(x), 1)])
    dataset.datasets['test'] = TensorDataset(*tensors)
    
    return dataset

#### Run inference for the new trial

In [None]:
sf_dataset = add_new_trial_to_safety_dataset(sf_dataset)
sf_args, sf_runner, sf_encoder \
    = prepare_runner(sf_args, sf_dataset, sf_encoder, sf_bert_encoder, sf_model, device='cpu')

_, y_test_pred, _ = model_inference(sf_runner, mode='test') 
print ('y_test_pred.size()', y_test_pred.size())

#### Predicted probablity of safety concern:

In [None]:
print(y_test_pred[0].item())

### Predict for the Efficacy task

In [None]:
(ef_dataset, ef_tasks), ef_encoder, ef_bert_encoder, ef_model, ef_args \
    = load_model('../data/models/efficacy_model_34l5ms9m/ckpt.pt')

In [None]:
ef_dataset_bk = deepcopy(ef_dataset)
ef_tasks_bk = deepcopy(ef_tasks)
ef_args_bk = deepcopy(ef_args)

#### Add new trial to inference dataset

In [None]:
def add_new_trial_to_efficacy_dataset(dataset):
    # Update the df
    _df = dataset.efficacy_df[dataset.efficacy_df['split']=='test'].head(1)
    dataset.efficacy_df = _df
    the_x1, the_x2 = _df.iloc[0]['x1'], _df.iloc[0]['x2']
    the_kgid1, the_kgid2 = _df.iloc[0]['kgid1'], _df.iloc[0]['kgid2']
    print('the_x1', the_x1, 'the_kgid1', the_kgid1, 'the_x2', the_x2, 'the_kgid2', the_kgid2)
    
    # Update the node features
    _node_feats = deepcopy(dataset.node_feats)
    pos = _node_feats[_node_feats['node_id'] == the_kgid1].index[0]
    _node_feats.at[pos, 'emb'] = new_emb_1
    pos = _node_feats[_node_feats['node_id'] == the_kgid2].index[0]
    _node_feats.at[pos, 'emb'] = new_emb_2
    dataset.node_feats = _node_feats
    
    # Update the graph edges
    _graph = deepcopy(dataset.graph)
    new_edges_1, new_etypes_1 = get_new_edges(new_trial_1, the_x1)
    new_edges_2, new_etypes_2 = get_new_edges(new_trial_2, the_x2)
    
    _edge_index = _graph.data.edge_index.clone()
    columns_with_the_x1 = _edge_index.eq(the_x1).any(dim=0)
    _edge_index = _edge_index[:, ~columns_with_the_x1]
    columns_with_the_x2 = _edge_index.eq(the_x2).any(dim=0)
    _edge_index = _edge_index[:, ~columns_with_the_x2]
    _edge_index = torch.cat([_edge_index, new_edges_1, new_edges_2], dim=1)
    _graph.data.edge_index = _edge_index
    
    _edge_type = _graph.data.edge_type.clone()
    _edge_type = _edge_type[~columns_with_the_x1]
    _edge_type = _edge_type[~columns_with_the_x2]
    _edge_type = torch.cat([_edge_type, new_etypes_1, new_etypes_2], dim=0)
    _graph.data.edge_type = _edge_type
    
    dataset.graph = _graph
    
    # Make the dataset ready for inference
    x = dataset._get_data_x(_df)
    print('x', x)
    tensors = [x]
    tensors.extend([dataset.task_ys[0][0].repeat(len(x), 1)])
    tensors.extend([dataset.sample_weight_masks[0][0].repeat(len(x), 1)])
    dataset.datasets['test'] = TensorDataset(*tensors)
    
    return dataset

In [None]:
ef_dataset = add_new_trial_to_efficacy_dataset(ef_dataset)
ef_args, ef_runner, ef_encoder \
    = prepare_runner(ef_args, ef_dataset, ef_encoder, ef_bert_encoder, ef_model, device='cpu')

_, y_test_pred, _ = model_inference(ef_runner, mode='test') 
print ('y_test_pred.size()', y_test_pred.size())

#### Predicted probability that trial arm 1 has better efficacy than trial arm 2:

In [None]:
print(y_test_pred[0].item())