# Loading required dataframes

In [1]:
from splitters import k_fold_edge_split, train_test_split_edges
from dataset_loader import load_dataframes
from model import Net
from utils import train_model, device_finder, train_model_on_folds, predict_edges, plot_layers_curve
import warnings
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch_geometric.transforms import LocalDegreeProfile

import torch
from torch_geometric.data import Data
import numpy as np

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

warnings.simplefilter(action='ignore', category=FutureWarning)
# final_dt_df -> onehot drug-target     : 760 * 790
# final_w2v_df -> word2vec              : 614 * 200
# final_nv_df -> node2vec               : 752 * 128
# final_fin_df -> drug fingerprint      : 627 * 167
# final_in_df -> drug indication        : 383 * 1513
# final_se_df -> drug side effect       : 389 * 3256
# all_df -> all drug combinations       : 2716 * 2
final_dt_df, final_w2v_df, final_nv_df, final_fin_df, final_in_df, final_se_df, all_df = load_dataframes()

# Loading All Edges

In [12]:
all_edges = []

for edge in all_df.values:
    if list(edge) not in all_edges and [edge[1], edge[0]] not in all_edges:
        all_edges.append(list(edge))
        all_edges.append([edge[1], edge[0]])

(2716, 2)
3732


# Node2Vec

In [33]:
# loading all edges of undirected graph

x_nv = final_nv_df.iloc[:, :128]
x_nv = np.array(x_nv, dtype=np.float32)

nodes_nv_list = list()
nodes_nv_dict = dict()

count = 0
for x in final_nv_df.values[:, 128:129]:
    nodes_nv_dict[str(x.squeeze())] = count
    count+=1
    nodes_nv_list.append(str(x.squeeze()))

edges_nv = list()
for edge in all_edges:
    if edge[0] in nodes_nv_list and edge[1] in nodes_nv_list:
        edges_nv.append([nodes_nv_dict[edge[0]], nodes_nv_dict[edge[1]]])
nodes_nv = list(nodes_nv_dict.values())
nodes_nv = torch.from_numpy(np.array(nodes_nv))
edges_nv = torch.from_numpy(np.array(edges_nv))
x_nv = torch.from_numpy(np.array(x_nv))

data_nv = Data(x=x_nv, edge_index=edges_nv.T)
data_nv

# raw_data = Data(edge_index=edges_nv.T)
# ldp = LocalDegreeProfile()
# ldp(raw_data)
# raw_data

Data(x=[752, 128], edge_index=[2, 3406])

# Word2Vec

In [40]:
# loading all edges of undirected graph

x_w2v = final_w2v_df.iloc[:, :200]
x_w2v = np.array(x_w2v, dtype=np.float32)

nodes_w2v_list = list()
nodes_w2v_dict = dict()

count = 0
for x in final_w2v_df.values[:, 200:201]:
    nodes_w2v_dict[str(x.squeeze())] = count
    count+=1
    nodes_w2v_list.append(str(x.squeeze()))


edges_w2v = list()
for edge in all_edges:
    if edge[0] in nodes_w2v_list and edge[1] in nodes_w2v_list:
        edges_w2v.append([nodes_w2v_dict[edge[0]], nodes_w2v_dict[edge[1]]])
nodes_w2v = list(nodes_w2v_dict.values())
nodes_w2v = torch.from_numpy(np.array(nodes_w2v))
edges_w2v = torch.from_numpy(np.array(edges_w2v))
x_w2v = torch.from_numpy(np.array(x_w2v))

data_w2v = Data(x=x_w2v, edge_index=edges_w2v.T)
data_w2v

# raw_data = Data(edge_index=edges_w2v.T)
# ldp = LocalDegreeProfile()
# ldp(raw_data)
# raw_data

Data(x=[614, 200], edge_index=[2, 2468])

# Side Effect

In [44]:
# loading all edges of undirected graph

x_se = final_se_df.iloc[:, :3256]
x_se = np.array(x_se, dtype=np.float32)

nodes_se_list = list()
nodes_se_dict = dict()

count = 0
for x in final_se_df.values[:, 3256:3257]:
    nodes_se_dict[str(x.squeeze())] = count
    count+=1
    nodes_se_list.append(str(x.squeeze()))

edges_se = list()
for edge in all_edges:
    if edge[0] in nodes_se_list and edge[1] in nodes_se_list:
        edges_se.append([nodes_se_dict[edge[0]], nodes_se_dict[edge[1]]])
nodes_se = list(nodes_se_dict.values())
nodes_se = torch.from_numpy(np.array(nodes_se))
edges_se = torch.from_numpy(np.array(edges_se))
x_se = torch.from_numpy(np.array(x_se))

data_se = Data(x=x_se, edge_index=edges_se.T)
data_se

# raw_data = Data(edge_index=edges_se.T)
# ldp = LocalDegreeProfile()
# ldp(raw_data)
# raw_data

Data(x=[389, 3256], edge_index=[2, 1132])

# Indication

In [46]:
# loading all edges of undirected graph

x_in = final_in_df.iloc[:, :1513]
x_in = np.array(x_in, dtype=np.float32)

nodes_in_list = list()
nodes_in_dict = dict()

count = 0
for x in final_in_df.values[:, 1513:1514]:
    nodes_in_dict[str(x.squeeze())] = count
    count+=1
    nodes_in_list.append(str(x.squeeze()))

edges_in = list()
for edge in all_edges:
    if edge[0] in nodes_in_list and edge[1] in nodes_in_list:
        edges_in.append([nodes_in_dict[edge[0]], nodes_in_dict[edge[1]]])
nodes_in = list(nodes_in_dict.values())
nodes_in = torch.from_numpy(np.array(nodes_in))
edges_in = torch.from_numpy(np.array(edges_in))
x_in = torch.from_numpy(np.array(x_in))

data_in = Data(x=x_in, edge_index=edges_in.T)
data_in

# raw_data = Data(edge_index=edges_in.T)
# ldp = LocalDegreeProfile()
# ldp(raw_data)
# raw_data

Data(x=[383, 1513], edge_index=[2, 1120])

# Finger

In [48]:
# loading all edges of undirected graph

x_fin = final_fin_df.iloc[:, :167]
x_fin = np.array(x_fin, dtype=np.float32)

nodes_fin_list = list()
nodes_fin_dict = dict()

count = 0
for x in final_fin_df.values[:, 167:168]:
    nodes_fin_dict[str(x.squeeze())] = count
    count+=1
    nodes_fin_list.append(str(x.squeeze()))

edges_fin = list()
for edge in all_edges:
    if edge[0] in nodes_fin_list and edge[1] in nodes_fin_list:
        edges_fin.append([nodes_fin_dict[edge[0]], nodes_fin_dict[edge[1]]])
nodes_fin = list(nodes_fin_dict.values())
nodes_fin = torch.from_numpy(np.array(nodes_fin))
edges_fin = torch.from_numpy(np.array(edges_fin))
x_fin = torch.from_numpy(np.array(x_fin))

data_fin = Data(x=x_fin, edge_index=edges_fin.T)
data_fin

# raw_data = Data(edge_index=edges_fin.T)
# ldp = LocalDegreeProfile()
# ldp(raw_data)
# raw_data

Data(x=[627, 167], edge_index=[2, 2334])

# One-hot Drug Target

In [50]:
# loading all edges of undirected graph

x_dt = final_dt_df.iloc[:, :790]
x_dt = np.array(x_dt, dtype=np.float32)

nodes_dt_list = list()
nodes_dt_dict = dict()

count = 0
for x in final_dt_df.values[:, 790:791]:
    nodes_dt_dict[str(x.squeeze())] = count
    count+=1
    nodes_dt_list.append(str(x.squeeze()))

edges_dt = list()
for edge in all_edges:
    if edge[0] in nodes_dt_list and edge[1] in nodes_dt_list:
        edges_dt.append([nodes_dt_dict[edge[0]], nodes_dt_dict[edge[1]]])
nodes_dt = list(nodes_dt_dict.values())
nodes_dt = torch.from_numpy(np.array(nodes_dt))
edges_dt = torch.from_numpy(np.array(edges_dt))
x_dt = torch.from_numpy(np.array(x_dt))

data_dt = Data(x=x_dt, edge_index=edges_dt.T)
data_dt

# raw_data = Data(edge_index=edges_dt.T)
# ldp = LocalDegreeProfile()
# ldp(raw_data)
# raw_data

Data(x=[760, 790], edge_index=[2, 3206])

# Creating graph data

In [76]:
def create_graph_data(df):
    r_size, c_size = df.shape

    x_data = df.iloc[:, :(c_size-2)]
    x_data = np.array(x_data, dtype=np.float32)

    nodes_data_list = list()
    nodes_data_dict = dict()

    count = 0
    for x in df.values[:, (c_size-2):(c_size-1)]:
        nodes_data_dict[str(x.squeeze())] = count
        count+=1
        nodes_data_list.append(str(x.squeeze()))

    edges_data = list()
    for edge in all_edges:
        if edge[0] in nodes_data_list and edge[1] in nodes_data_list:
            edges_data.append([nodes_data_dict[edge[0]], nodes_data_dict[edge[1]]])
    nodes_data = list(nodes_data_dict.values())
    nodes_data = torch.from_numpy(np.array(nodes_data))
    edges_data = torch.from_numpy(np.array(edges_data))
    x_data = torch.from_numpy(np.array(x_data))

    data = Data(x=x_data, edge_index=edges_data.T)
    return data

In [78]:
df_list = [final_dt_df, final_w2v_df, final_nv_df, final_fin_df, final_in_df, final_se_df]

def create_graph_data_with_different_features(df_list):
    for df in df_list:
        print(create_graph_data(df))

create_graph_data_with_different_features(df_list)

Data(x=[760, 790], edge_index=[2, 3206])
Data(x=[614, 200], edge_index=[2, 2468])
Data(x=[752, 128], edge_index=[2, 3406])
Data(x=[627, 167], edge_index=[2, 2334])
Data(x=[383, 1513], edge_index=[2, 1120])
Data(x=[389, 3256], edge_index=[2, 1132])
