In [1]:
import pandas as pd
import torch_geometric as tg
from torch_geometric.transforms import LocalDegreeProfile
from torch_geometric.utils import train_test_split_edges
from torch_geometric.utils import negative_sampling
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GINConv
from sklearn.metrics import roc_auc_score
from torch.nn.functional import binary_cross_entropy_with_logits
import matplotlib.pyplot as plt
from torch_geometric.transforms import LocalDegreeProfile
from torch_geometric.data import Data
import numpy as np
import torch
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [81]:
base_path = 'datasets/'

ddi_path = f'{base_path}DRUG_INTERACTION_graphFile_withClass.csv'
dc_path = f'{base_path}TwoComb.csv'
all_path = f'{base_path}AllComb.csv'

dt_path = f'{base_path}node_features/Drugs_Targets_Onehot.csv'
w2v_path = f'{base_path}node_features/word2vec.csv'
sev_path = f'{base_path}node_features/sideEffectVec.csv'
iv_path = f'{base_path}node_features/indicationsVec.csv'
fin_path = f'{base_path}node_features/Drug_finger.csv'
nv_path = f'{base_path}node_features/Node2Vec_DCC.csv'


dt_df = pd.read_csv(dt_path)
w2v_df = pd.read_csv(w2v_path)
all_df = pd.read_csv(all_path)
ddi_df = pd.read_csv(ddi_path)
se_df = pd.read_csv(sev_path)
in_df = pd.read_csv(iv_path)
fin_df = pd.read_csv(fin_path)
nv_df = pd.read_csv(nv_path)
all_df.columns = ['d1', 'd2']

## loading unique nodes list and dict

In [5]:
drug_list = list()
drug_dict = dict()

count = 0
for edge in all_df.values:
    n1 = edge[0]
    n2 = edge[1]
    if n1 not in drug_list:
        drug_list.append(n1)
        drug_dict[n1] = count
        count+=1
    if n2 not in drug_list:
        drug_list.append(n2)
        drug_dict[n2] = count
        count+=1
print(len(drug_dict))
print(len(drug_dict))

898
898


## loading nodes with w2v features

In [98]:
temp_drugs = []
temp_vector = []
temp_drugs_no = []
for each in w2v_df.values:
    drug = each[0]
    str_vector = [num for num in str(each[1]).replace('[', '').replace(']', '').replace('\n', '').split(' ')]
    vector = []
    for num in str_vector:
        if num != '':
            vector.append(float(num))
    temp_drugs.append(drug)
    temp_vector.append(vector)
    temp_drugs_no.append(drug_dict[drug])

t = pd.DataFrame(temp_vector)
t.columns = [f'c{i}' for i in range(1, len(temp_vector[0])+1)]
t['drugs'] = pd.DataFrame(temp_drugs)
t['drugsNo'] = pd.DataFrame(temp_drugs_no)
t = t.sort_values('drugsNo')
t.index = t.drugsNo
final_w2v_df = t.copy(deep=True)

## loading nodes with finger features

In [97]:
fin_df

temp_drugs = []
temp_vector = []
temp_drugs_no = []

for each in fin_df.values:
    drug = each[1]
    vector = [int(num) for num in str(each[2]).replace('[', '').replace(']', '').split(', ')]
    temp_drugs.append(drug)
    temp_vector.append(vector)
    temp_drugs_no.append(drug_dict[drug])

t = pd.DataFrame(temp_vector)
t.columns = [f'c{i}' for i in range(1, len(temp_vector[0])+1)]
t['drugs'] = pd.DataFrame(temp_drugs)
t['drugsNo'] = pd.DataFrame(temp_drugs_no)
t = t.sort_values('drugsNo')
t.index = t.drugsNo
final_fin_df = t.copy(deep=True)

## loading nodes with onehot target features

In [119]:
dt_df

temp_drugs = []
temp_vector = []
temp_drugs_no = []

t = dt_df.copy(deep=True)

for each in t.values:
    drug = each[0]
    if drug not in drug_list:
        continue
    temp_drugs_no.append(drug_dict[drug])
    temp_drugs.append(drug)


t = t.drop('DCC_ID', 1)
t['drugs'] = pd.DataFrame(temp_drugs)
t['drugsNo'] = pd.DataFrame(temp_drugs_no)
t = t.sort_values('drugsNo')
t.index = t.drugsNo

final_dt_df = t.copy(deep=True)

  t = t.drop('DCC_ID', 1)


## loading node with sideEffect features

In [120]:
temp_drugs = []
temp_vector = []
temp_drugs_no = []

t = se_df.copy(deep=True)

for each in t.values:
    drug = each[0]
    if drug not in drug_list:
        continue
    temp_drugs_no.append(drug_dict[drug])
    temp_drugs.append(drug)

t = t.drop('DCC_ID', 1)
t['drugs'] = pd.DataFrame(temp_drugs)
t['drugsNo'] = pd.DataFrame(temp_drugs_no)
t = t.sort_values('drugsNo')
t.index = t.drugsNo

final_se_df = t.copy(deep=True)

  t = t.drop('DCC_ID', 1)


## loading nodes with indications features

In [124]:
temp_drugs = []
temp_vector = []
temp_drugs_no = []

t = in_df.copy(deep=True)

for each in t.values:
    drug = each[0]
    if drug not in drug_list:
        continue
    temp_drugs_no.append(drug_dict[drug])
    temp_drugs.append(drug)

t = t.drop('DCC_ID', 1)
t['drugs'] = pd.DataFrame(temp_drugs)
t['drugsNo'] = pd.DataFrame(temp_drugs_no)
t = t.sort_values('drugsNo')
t.index = t.drugsNo

final_in_df = t.copy(deep=True)

  t = t.drop('DCC_ID', 1)


## temp