In [1]:
import numpy as np
import pandas as pd
import torch
import torch_geometric
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, Batch
from torch_geometric.transforms import LocalDegreeProfile
import torch.nn.functional as F
from torch_geometric.datasets import GemsecDeezer
from sklearn.metrics import accuracy_score, roc_auc_score
from pathlib import Path

In [2]:
# locating paths to datasets
base_dir = Path.cwd()
dataset_dir = f'{base_dir}/datasets'
se_df_path = f'{dataset_dir}/sideEffectsfillterd.csv'
g80_df_path = f'{dataset_dir}/graph80.csv'
g85_df_path = f'{dataset_dir}/graph85.csv'
# load datasets
se_df = pd.read_csv(se_df_path)
g80_df = pd.read_csv(g80_df_path)
g85_df = pd.read_csv(g85_df_path)

In [3]:
# loading all edges and scores
all_edges = []
all_edges_scores = []
for row in g80_df.values:
    row = list(row)
    score = row[3]
    edge = [row[1], row[2]]
    reverse_edge = [row[2], row[1]]
    if edge not in all_edges and reverse_edge not in all_edges:
        all_edges.append(edge)
        all_edges.append(reverse_edge)
        all_edges_scores.append(score)
        all_edges_scores.append(score)

In [None]:
for edge in all_edges:
    print(edge)

In [50]:
# Graph data creator
def create_graph_data_from_nodes_with_features(df, all_edges, scores=None, with_edge_attr=False):
    # Getting the number of rows and columns
    r_size, c_size = df.shape
    # Loading only the feature sets from dataframe
    x_data = df.iloc[:, 1:(c_size-1)]
    x_data = np.array(x_data, dtype=np.float32)
    # Temp lists and dictionary
    nodes_data_list = list()
    nodes_data_dict = dict()
    reverse_node_data_dict = dict()
    # Loading only drugs with their DCC code
    count = 0
    for x in df.values[:, (c_size-1):]:
        nodes_data_dict[str(x.squeeze())] = count
        reverse_node_data_dict[count] = str(x.squeeze())
        count+=1
        nodes_data_list.append(str(x.squeeze()))
    # loading only nodes and edges that we do have the node features 
    edge_scores = list()
    edges_data = list()
    for i, edge in enumerate(all_edges):
        if edge[0] in nodes_data_list and edge[1] in nodes_data_list:
            edges_data.append([nodes_data_dict[edge[0]], nodes_data_dict[edge[1]]])
            edge_scores.append(scores[i])
    ################################################
    nodes_data = list(nodes_data_dict.values())
    nodes_data = torch.from_numpy(np.array(nodes_data))
    edges_data = torch.from_numpy(np.array(edges_data))
    x_data = torch.from_numpy(np.array(x_data))
    if with_edge_attr == True:
        data = Data(x=x_data, edge_index=edges_data.T, edge_attr=edge_scores)
    else:
        data = Data(x=x_data, edge_index=edges_data.T)

    return data, reverse_node_data_dict

data, reverse_node_data_list = create_graph_data_from_nodes_with_features(se_df, all_edges, all_edges_scores)
print(data)

Data(x=[308, 1090], edge_index=[2, 204])


In [51]:
class GNN(torch.nn.Module):
    def __init__(self, in_channels, hid_channels, out_channels):
        super(GNN, self).__init__()
        torch.manual_seed(0)
        self.conv1 = GCNConv(in_channels=in_channels, out_channels=hid_channels)
        self.conv2 = GCNConv(in_channels=hid_channels, out_channels=hid_channels)
        self.conv3 = GCNConv(in_channels=hid_channels, out_channels=out_channels)
        # self.lin1 = torch.nn.Linear(in_features=hid_channels, out_features=out_channels)

    def forward(self, x, edge_index, index=0):
        # print(x)
        x = self.conv1(x, edge_index)
        # print(x)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        # x = self.lin1(x)
        # x = x.sigmoid()
        return x

In [52]:
ldp = LocalDegreeProfile()
data = ldp(data)
data

Data(x=[308, 1095], edge_index=[2, 204])