In [1]:
!pip install torch torchvision torchaudio torch-geometric scikit-learn numpy pandas matplotlib seaborn tqdm easydict -q

In [1]:
import torch, numpy as np, pandas as pd
from IPython.display import Image

In [None]:
# loaded_data = torch.load('/kaggle/input/gremi-ori/BRCA/data.pt')
# print(loaded_data.keys())

# for key in loaded_data.keys():
#     print(key)
#     print(loaded_data[key].shape)
#     print(loaded_data[key])

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.nn import global_mean_pool as gap
from torch.nn import LayerNorm, Parameter
from torch.nn import init, Parameter
import torch.optim.lr_scheduler as lr_scheduler
from typing import Dict

from utils import *


def xavier_init(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0.0)

class LinearLayer(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.clf = nn.Sequential(nn.Linear(in_dim, out_dim))
        self.clf.apply(xavier_init)

    def forward(self, x):
        x = self.clf(x)
        return x

class Fusion(nn.Module):
    def __init__(self, num_class, num_views, hidden_dim, dropout, in_dim, dim1, dim2, dim3, alpha=0.5):
        super().__init__()
        self.gat1 = GAT(dropout=0.5, alpha=alpha, dim=dim1)
        self.gat2 = GAT(dropout=0.5, alpha=alpha, dim=dim2)
        self.gat3 = GAT(dropout=0.5, alpha=alpha, dim=dim3)

        self.views = len(in_dim)
        self.classes = num_class
        self.dropout = dropout
        self.hidden_dim = hidden_dim

        self.FeatureInforEncoder = nn.ModuleList([LinearLayer(in_dim[view], in_dim[view]) for view in range(self.views)])
        self.TCPConfidenceLayer = nn.ModuleList([LinearLayer(hidden_dim[0], 1) for _ in range(self.views)])
        self.TCPClassifierLayer = nn.ModuleList([LinearLayer(hidden_dim[0], num_class) for _ in range(self.views)])

        self.MMClasifier = []
        for layer in range(1, len(hidden_dim) - 1):
            self.MMClasifier.append(LinearLayer(self.views * hidden_dim[0], hidden_dim[layer]))
            self.MMClasifier.append(nn.ReLU())
            self.MMClasifier.append(nn.Dropout(p=dropout))
        if len(self.MMClasifier):
            self.MMClasifier.append(LinearLayer(hidden_dim[-1], num_class))
        else:
            self.MMClasifier.append(LinearLayer(self.views * hidden_dim[-1], num_class))
        self.MMClasifier = nn.Sequential(*self.MMClasifier)


    def forward(self, omic1, omic2, omic3, adj1, adj2, adj3, label=None, infer=False):
        output1, gat_output1 = self.gat1(omic1, adj1)
        output2, gat_output2 = self.gat2(omic2, adj2)
        output3, gat_output3 = self.gat3(omic3, adj3)
        #
        feature = dict()
        feature[0], feature[1], feature[2] = output1, output2, output3
        #
        criterion = torch.nn.CrossEntropyLoss(reduction='none')
        loss_function = nn.CrossEntropyLoss()
        #
        FeatureInfo, TCPLogit, TCPConfidence = dict(), dict(), dict()
        for view in range(self.views):
            feature[view] = F.relu(feature[view])
            feature[view] = F.dropout(feature[view], self.dropout, training=self.training)
            TCPLogit[view] = self.TCPClassifierLayer[view](feature[view])
            TCPConfidence[view] = self.TCPConfidenceLayer[view](feature[view])
            feature[view] = feature[view] * TCPConfidence[view]

        MMfeature = torch.cat([i for i in feature.values()], dim=1)
        MMlogit = self.MMClasifier(MMfeature)
        if infer:
            return MMlogit
        MMLoss = torch.mean(criterion(MMlogit, label))
        print(torch.unique(gat_output1))
        loss_gat1 = loss_function(gat_output1,label)
        loss_gat2 = loss_function(gat_output2,label)
        loss_gat3 = loss_function(gat_output3,label)
        gat_loss = dict()
        gat_loss[0], gat_loss[1], gat_loss[2] = loss_gat1, loss_gat2, loss_gat3
        for view in range(self.views):
            MMLoss = MMLoss + gat_loss[view]
            pred = F.softmax(TCPLogit[view], dim=1)
            p_target = torch.gather(input=pred, dim=1, index=label.unsqueeze(dim=1)).view(-1)
            confidence_loss = torch.mean(
                F.mse_loss(TCPConfidence[view].view(-1), p_target) + criterion(TCPLogit[view], label))
            MMLoss = MMLoss + confidence_loss
        return MMLoss, MMlogit, gat_output1, gat_output2, gat_output3, output1, output2, output3

    def infer(self, omic1, omic2, omic3, adj1, adj2, adj3):
        MMlogit = self.forward(omic1, omic2, omic3, adj1, adj2, adj3, infer=True)
        return MMlogit


class GAT(nn.Module):
    def __init__(self, dropout, alpha, dim):

        super(GAT, self).__init__()
        self.dropout = dropout
        self.act = define_act_layer(act_type='none')
        self.dim = dim
        self.nhids = [8, 16, 12]
        self.nheads = [4, 3, 4]
        self.fc_dim = [600, 256, 64, 32]

        self.attentions1 = [GraphAttentionLayer(
            1, self.nhids[0], dropout=dropout, alpha=alpha, concat=True) for _ in range(self.nheads[0])]
        for i, attention1 in enumerate(self.attentions1):
            self.add_module('attention1_{}'.format(i), attention1)

        self.attentions2 = [GraphAttentionLayer(
            self.nhids[0] * self.nheads[0], self.nhids[1], dropout=dropout, alpha=alpha, concat=True) for _ in
            range(self.nheads[1])]
        for i, attention2 in enumerate(self.attentions2):
            self.add_module('attention2_{}'.format(i), attention2)

        self.attentions3 = [GraphAttentionLayer(
            self.nhids[1] * self.nheads[1], self.nhids[2], dropout=dropout, alpha=alpha, concat=True) for _ in
            range(self.nheads[2])]
        for i, attention3 in enumerate(self.attentions3):
            self.add_module('attention3_{}'.format(i), attention3)

        self.dropout_layer = nn.Dropout(p=self.dropout)


        self.pool1 = torch.nn.Linear(self.nhids[0] * self.nheads[0], 1)
        self.pool2 = torch.nn.Linear(self.nhids[1] * self.nheads[1], 1)
        self.pool3 = torch.nn.Linear(self.nhids[2] * self.nheads[2], 1)

        lin_input_dim = 3 * self.dim
        self.fc1 = nn.Sequential(
            nn.Linear(lin_input_dim, self.fc_dim[0]),
            nn.ELU(),
            nn.AlphaDropout(p=self.dropout, inplace=False))
        self.fc1.apply(xavier_init)

        self.fc2 = nn.Sequential(
            nn.Linear(self.fc_dim[0], self.fc_dim[1]),
            nn.ELU(),
            nn.AlphaDropout(p=self.dropout, inplace=False))
        self.fc2.apply(xavier_init)

        self.fc3 = nn.Sequential(
            nn.Linear(self.fc_dim[1], self.fc_dim[2]),
            nn.ELU(),
            nn.AlphaDropout(p=self.dropout, inplace=False))
        self.fc3.apply(xavier_init)

        self.fc4 = nn.Sequential(
            nn.Linear(self.fc_dim[2], self.fc_dim[3]),
            nn.ELU(),
            nn.AlphaDropout(p=self.dropout, inplace=False))
        self.fc4.apply(xavier_init)

        self.fc5 = nn.Sequential(
            nn.Linear(self.fc_dim[3], 2))
        self.fc5.apply(xavier_init)

    def forward(self, x, adj):


        x0 = torch.mean(x, dim=-1)
        x = self.dropout_layer(x)
        x = torch.cat([att(x, adj) for att in self.attentions1], dim=-1)

        x1 = self.pool1(x).squeeze(-1)
        x = self.dropout_layer(x)
        x = torch.cat([att(x, adj) for att in self.attentions2], dim=-1)

        x2 = self.pool2(x).squeeze(-1)
        x = torch.cat([x0, x1, x2], dim=1)

        x = self.fc1(x)
        x = self.fc2(x)
        x1 = self.fc3(x)
        x = self.fc4(x1)
        x = self.fc5(x)

        output = x1
        gat_output = x

        return output, gat_output


class GraphAttentionLayer(nn.Module):

    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.zeros(size=(2 * out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)
        self.dropout_layer = nn.Dropout(p=self.dropout)

    def forward(self, input, adj):
        """
        input: mini-batch input. size: [batch_size, num_nodes, node_feature_dim]
        adj:   adjacency matrix. size: [num_nodes, num_nodes].  need to be expanded to batch_adj later.
        """
        h = torch.matmul(input, self.W)
        bs, N, _ = h.size()

        a_input = torch.cat([h.repeat(1, 1, N).view(bs, N * N, -1), h.repeat(1, N, 1)], dim=-1).view(bs, N, -1,
                                                                                                     2 * self.out_features)

        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(3))

        batch_adj = torch.unsqueeze(adj, 0).repeat(bs, 1, 1)


        zero_vec = -9e15 * torch.ones_like(e)
        attention = torch.where(batch_adj > 0, e, zero_vec)
        attention = self.dropout_layer(F.softmax(attention, dim=-1))  # [bs, N, N]
        # print("attention shape:", attention.shape)
        h_prime = torch.bmm(attention, h)  # [bs, N, F]
        # print("h_prime:", h_prime.shape)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'


In [5]:
import os
import logging
import time
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import pickle
import copy
import time
import torch
# from model_GBM import *

# Env
from utils import *
import gc


main_gremi = './train_test_1.py'
data_dir = f'./TCGA_GBM_GExCNVxMETH_2000_MinMaxScaler/1'
model_save_dir = f'./model-gbm1.pth'
patience = 200
tr_label='tr'
te_label='te'
batch_size=32

# DATA
# loaded_data = torch.load(data_dir)
#
data_tr = pd.concat(
    [
        pd.read_csv(f"{data_dir}/1_{tr_label}.csv").iloc[:, :200],
        pd.read_csv(f"{data_dir}/2_{tr_label}.csv").iloc[:, :200],
        pd.read_csv(f"{data_dir}/3_{tr_label}.csv").iloc[:, :200],
        pd.read_csv(f"{data_dir}/labels_{tr_label}.csv").iloc[:, :200],
    ],
    axis=1,
)
tr_omic = torch.tensor(data_tr.iloc[:, :-1].values, dtype=torch.float32)
tr_labels = torch.tensor(data_tr.iloc[:, -1].values, dtype=torch.long)
data_te = pd.concat(
    [
        pd.read_csv(f"{data_dir}/1_{te_label}.csv").iloc[:, :200],
        pd.read_csv(f"{data_dir}/2_{te_label}.csv").iloc[:, :200],
        pd.read_csv(f"{data_dir}/3_{te_label}.csv").iloc[:, :200],
        pd.read_csv(f"{data_dir}/labels_{te_label}.csv").iloc[:, :200],
    ],
    axis=1,
)
te_omic = torch.tensor(data_te.iloc[:, :-1].values, dtype=torch.float32)
te_labels = torch.tensor(data_te.iloc[:, -1].values, dtype=torch.long)
exp_adj1 = torch.tensor(
    pd.read_csv(f"{data_dir}/adj1.csv", header=0, index_col=0).iloc[:200, :200].values, dtype=torch.float32
)
exp_adj2 = torch.tensor(
    pd.read_csv(f"{data_dir}/adj2.csv", header=0, index_col=0).iloc[:200, :200].values, dtype=torch.float32
)
exp_adj3 = torch.tensor(
    pd.read_csv(f"{data_dir}/adj3.csv", header=0, index_col=0).iloc[:200, :200].values, dtype=torch.float32
)
print(data_tr.shape)
# print(data_tr.head())
print(data_te.shape)
# print(data_te.head())
print(tr_omic.shape)
# print(tr_omic)
print(tr_labels.shape)
# print(tr_labels)
print(te_omic.shape)
# print(te_omic)
print(te_labels.shape)
# print(te_labels)
print(exp_adj1.shape)
# print(exp_adj1)
print(exp_adj2.shape)
# print(exp_adj2)
print(exp_adj3.shape)
# print(exp_adj3)

# DATA LOADRE
tr_dataset = torch.utils.data.TensorDataset(tr_omic, tr_labels)
tr_data_loader = torch.utils.data.DataLoader(
    dataset=tr_dataset, batch_size=batch_size, shuffle=True
)
te_dataset = torch.utils.data.TensorDataset(te_omic, te_labels)
te_data_loader = torch.utils.data.DataLoader(
    dataset=te_dataset, batch_size=batch_size, shuffle=False
)


num_epochs = 3000
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


loss_function = nn.CrossEntropyLoss()
input_in_dim = [exp_adj1.shape[0], exp_adj2.shape[0], exp_adj3.shape[0]]
input_hidden_dim = [64]
network = Fusion(
    num_class=4,
    num_views=3,
    hidden_dim=input_hidden_dim,
    dropout=0.1,
    in_dim=input_in_dim,
    dim1=input_in_dim[0],
    dim2=input_in_dim[1],
    dim3=input_in_dim[2],
)
network.to(device)

optimizer = torch.optim.Adam(network.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.2)

best_model_wts = copy.deepcopy(network.state_dict())
best_acc = 0.0
best_epoch = 0
train_loss_all = []
train_acc_all = []
test_loss_all = []
test_acc_all = []

# Early Stopping
early_stopping_counter = 0

for epoch in range(0, num_epochs):
    print(" Epoch {}/{}".format(epoch, num_epochs - 1))
    print(patience, num_epochs)
    print("-" * 10)
    isPrint = epoch % 100 == 0
    # Print epoch
    if isPrint:
        print(" Epoch {}/{}".format(epoch, num_epochs - 1))
        print("-" * 10)
    # Set current loss value
    network.train()
    current_loss = 0.0
    train_loss = 0.0
    train_corrects = 0
    train_num = 0

    for i, data in enumerate(tr_data_loader, 0):

        batch_x, targets = data
        print(batch_x.shape)
        batch_x1 = batch_x[:, : input_in_dim[0]].reshape(-1, input_in_dim[0], 1)
        batch_x2 = batch_x[:, input_in_dim[0] : -input_in_dim[2]].reshape(
            -1, input_in_dim[1], 1
        )
        batch_x3 = batch_x[:, -input_in_dim[2] :].reshape(-1, input_in_dim[2], 1)

        batch_x1 = batch_x1.to(torch.float32)
        batch_x2 = batch_x2.to(torch.float32)
        batch_x3 = batch_x3.to(torch.float32)
        targets = targets.long()
        batch_x1 = batch_x1.to(device)
        batch_x2 = batch_x2.to(device)
        batch_x3 = batch_x3.to(device)
        targets = targets.to(device)
        exp_adj1 = exp_adj1.to(device)
        exp_adj2 = exp_adj2.to(device)
        exp_adj3 = exp_adj3.to(device)
        print(targets)

        optimizer.zero_grad()
        (
            loss_fusion,
            tr_logits,
            gat_output1,
            gat_output2,
            gat_output3,
            output1,
            output2,
            output3,
        ) = network(batch_x1, batch_x2, batch_x3, exp_adj1, exp_adj2, exp_adj3, targets)
        tr_prob = F.softmax(tr_logits, dim=1)
        tr_pre_lab = torch.argmax(tr_prob, 1)

        loss = loss_fusion
        loss.backward()
        optimizer.step()
        print("3")

        train_loss += loss.item() * batch_x1.size(0)
        train_corrects += torch.sum(tr_pre_lab == targets.data)
        train_num += batch_x1.size(0)
    # Evaluation for this fold
    print("Trained")
    network.eval()
    test_loss = 0.0
    test_corrects = 0
    test_num = 0
    for i, data in enumerate(te_data_loader, 0):
        batch_x, targets = data
        batch_x1 = batch_x[:, : input_in_dim[0]].reshape(-1, input_in_dim[0], 1)
        batch_x2 = batch_x[:, input_in_dim[0] : -input_in_dim[2]].reshape(
            -1, input_in_dim[1], 1
        )
        batch_x3 = batch_x[:, -input_in_dim[2] :].reshape(-1, input_in_dim[2], 1)
        batch_x1 = batch_x1.to(torch.float32)
        batch_x2 = batch_x2.to(torch.float32)
        batch_x3 = batch_x3.to(torch.float32)
        targets = targets.long()
        batch_x1 = batch_x1.to(device)
        batch_x2 = batch_x2.to(device)
        batch_x3 = batch_x3.to(device)
        targets = targets.to(device)
        exp_adj1 = exp_adj1.to(device)
        exp_adj2 = exp_adj2.to(device)
        exp_adj3 = exp_adj3.to(device)

        te_logits = network.infer(
            batch_x1, batch_x2, batch_x3, exp_adj1, exp_adj2, exp_adj3
        )
        te_prob = F.softmax(te_logits, dim=1)
        te_pre_lab = torch.argmax(te_prob, 1)

        test_corrects += torch.sum(te_pre_lab == targets.data)
        test_num += batch_x1.size(0)

    train_loss_all.append(train_loss / train_num)
    train_acc_all.append(train_corrects.double().item() / train_num)
    test_acc_all.append(test_corrects.double().item() / test_num)
    print("Tested")
    if isPrint:
        print(
            "{} Train Loss : {:.8f} Train ACC : {:.8f}".format(
                epoch, train_loss_all[-1], train_acc_all[-1]
            )
        )
        print("{}  Test ACC : {:.8f}".format(epoch, test_acc_all[-1]))

    if test_acc_all[-1] > best_acc:
        best_acc = test_acc_all[-1]
        best_epoch = epoch + 1
        best_model_wts = copy.deepcopy(network.state_dict())
        early_stopping_counter = 0
        # Saving the model
        save_path = model_save_dir
        state = {
            "net": best_model_wts,
        }
        torch.save(state, save_path)
    else:
        early_stopping_counter += 1

    if early_stopping_counter >= patience:
        print(f"Early stopping triggered at epoch {epoch + 1}")
        print(f"Best test accuracy: {best_acc}")
        print(f"Best test epoch: {best_epoch}")
        break
print("end")

plt.figure(figsize=(30, 15))
plt.subplot(1, 2, 1)
plt.plot(train_loss_all, "ro-", label="Train loss")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.title("Best test epoch: {0}".format(best_epoch - 1))
plt.subplot(1, 2, 2)
plt.plot(train_acc_all, "ro-", label="Train acc")
plt.plot(test_acc_all, "bs-", label="Test acc")
plt.xlabel("epoch")
plt.ylabel("acc")
plt.title("Best test Acc: {0}".format(best_acc))
plt.legend()
plt.savefig("/kaggle/working/total_loss.png")
plt.show()


(161, 601)
(53, 601)
torch.Size([161, 600])
torch.Size([161])
torch.Size([53, 600])
torch.Size([53])
torch.Size([200, 200])
torch.Size([200, 200])
torch.Size([200, 200])
 Epoch 0/2999
200 3000
----------
 Epoch 0/2999
----------
torch.Size([32, 600])
tensor([3, 1, 2, 1, 3, 1, 0, 0, 3, 2, 1, 1, 0, 3, 3, 1, 0, 2, 1, 0, 2, 3, 2, 0,
        2, 0, 3, 0, 3, 1, 3, 3])


IndexError: Target 3 is out of bounds.

In [None]:
for i in range(1, 5):
    print(i)
    main_gremi = './train_test_1.py'
    data_dir = f'./TCGA_GBM_GExCNVxMETH_2000_MinMaxScaler/{i}'
    model_save_dir = f'./model-gbm{i}.pth'
    patience = 200
    tr_label='tr'
    te_label='te'
    batch_size=32
    !python {main_gremi} '{data_dir}' '{model_save_dir}' '{patience}' '{tr_label}' '{te_label}' '{batch_size}'
    
    # display(Image("/kaggle/working/total_loss.png"))

1
 Epoch 0/2999
200 3000
----------
 Epoch 0/2999
----------
2
 Epoch 0/2999
200 3000
----------
 Epoch 0/2999
----------
3


UnboundLocalError: cannot access local variable 'child' where it is not associated with a value

In [None]:
# %cd /kaggle/input/gremi-ori/ROSMAP
# !python model_test_rosmap.py

main_gremi = '/kaggle/input/gremi-ori/train_test_0.py'
data_dir = '/kaggle/input/gremi-ori/ROSMAP/data.pt'
model_save_dir = '/kaggle/working/model-rosmap1.pth'
patience = 100
!python {main_gremi} '{data_dir}' '{model_save_dir}' '{patience}'

display(Image("/kaggle/working/total_loss.png"))

  loaded_data = torch.load(data_dir)
 Epoch 0/2999
----------
0 Train Loss : 8.88440137 Train ACC : 0.55918367
0  Test ACC : 0.51886792


In [None]:
%cd /kaggle/input/gremi-ori/BRCA
!python model_test_0.py

# main_gremi = '/kaggle/input/gremi-ori/train_test_0.py'
# data_dir = '/kaggle/input/gremi-ori/BRCA/data.pt'
# model_save_dir = '/kaggle/working/model-brca1.pth'
# patience = 100
# !python {main_gremi} '{data_dir}' '{model_save_dir}' '{patience}'

# display(Image("/kaggle/working/total_loss.png"))

In [None]:
# %cd /kaggle/input/gremi-ori/in-house data/LUAD
# !python train_test_0.py
main_gremi = '/kaggle/input/gremi-ori/train_test_0.py'
data_dir = '/kaggle/input/gremi-ori/in-house data/LUAD/data.pt'
model_save_dir = '/kaggle/working/model-luad1.pth'
patience = 100
!python {main_gremi} '{data_dir}' '{model_save_dir}' '{patience}'

display(Image("/kaggle/working/total_loss.png"))

In [None]:
main_gremi = '/kaggle/input/gremi-ori/train_test_0.py'
data_dir = '/kaggle/input/gremi-ori/in-house data/THCA/data.pt'
model_save_dir = '/kaggle/working/model-thca1.pth'
patience = 100
!python {main_gremi} '{data_dir}' '{model_save_dir}' '{patience}'

display(Image("/kaggle/working/total_loss.png"))

In [None]:
main_gremi = '/kaggle/input/gremi-ori/train_test_0.py'
data_dir = '/kaggle/input/gremi-ori/in-house data/UCEC/data.pt'
model_save_dir = '/kaggle/working/model-ucec1.pth'
patience = 100
!python {main_gremi} '{data_dir}' '{model_save_dir}' '{patience}'

display(Image("/kaggle/working/total_loss.png"))

In [None]:
# %cd /kaggle/input/gremi-ori/explain
# !python mutag_sub_demo.py