# Data Cleaning


In [65]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import pandas as pd

#Change the directory path to point where the data set file is located
df = pd.read_csv("/content/drive/MyDrive/PubMed_records_for_covid-19_labelled&unlabelled.xlsx - Sheet1 (1).csv")

df = df[['Article title','Article keywords', 'Article abstract', 'Contextual']].copy()

df.head()

In [None]:
import nltk
nltk.download('stopwords')


In [None]:
from nltk.corpus import stopwords
stop = stopwords.words('english')

print(stop)

In [None]:
import string

df['Article title'] = df['Article title'].str.lower()
df['Article keywords'] = df['Article keywords'].str.lower()
df['Article abstract'] = df['Article abstract'].str.lower()

df['Article title'] = df['Article title'].str.replace('[{}]'.format(string.punctuation.replace('-','')), '')
df['Article abstract'] = df['Article abstract'].str.replace('[{}]'.format(string.punctuation.replace('-','')), '')
df['Article keywords'] = df['Article keywords'].str.replace('[{}]'.format(string.punctuation.replace('-','')), '')

df['Article title'] = df['Article title'].apply(lambda x: ' '.join([item for item in x.split() if item not in stop]))
df['Article keywords'] = df['Article keywords'].apply(lambda x: ' '.join([item for item in str(x).split(';') if item not in stop]))
df['Article abstract'] = df['Article abstract'].apply(lambda x: ' '.join([item for item in x.split() if item not in stop]))




In [None]:
from sklearn.model_selection import train_test_split

df_labeled = df[df['Contextual'].notnull()]
df_unlabeled = df[df['Contextual'].isnull()]
df_train, df_test = train_test_split(df_labeled, test_size=0.2, shuffle=False)
df_train, df_validation = train_test_split(df_train, test_size=0.06, shuffle=False)

labeled_index_in_train = list(df_train.index.values)
labeled_index_in_validation = list(df_validation.index.values)
labeled_index_in_test = list(df_test.index.values)
unlabeled_index = list(df_unlabeled.index.values)

train_size = len(df_train) + len(df_validation) + len(df_test) + len(df_unlabeled)

df_test_without_contextual = df_test[['Article title', 'Article keywords', 'Article abstract']]

df_train = pd.concat([df_train, df_validation, df_test_without_contextual , df_unlabeled])

# print(df_train.head())
# print(df_test.head())

print(labeled_index_in_train)
print(labeled_index_in_validation)
print(labeled_index_in_test)

print(df_train.info())
print(df_validation.info())
print(df_test.info())



# Build Graph


## Build list of training documents

In [None]:
row = []
col = []
weight = []

train_list = []

for index in df_train.index:
  train_list.append(str(index) + ':=:' + df_train['Article title'][index] + df_train['Article keywords'][index] + df_train['Article abstract'][index]) 



## TF-IDF for document-word weight in graph

In [None]:
from math import log
# doc word frequency
# TF is simple raw frequency unlike (1+log(tf)), we might want to change that if accuracy is not good

# build vocab
word_set = set()
for doc_words in train_list:
    # words = doc_words.split()
    words = doc_words.split(':=:')[1].split()
    for word in words:
        word_set.add(word)

vocab = list(word_set)
vocab_size = len(vocab)

word_id_map = {}
for i in range(vocab_size):
    word_id_map[vocab[i]] = i

doc_word_freq = {}

for doc_id in range(train_size):
    doc_words = train_list[doc_id]
    # words = doc_words.split()
    words = doc_words.split(':=:')[1].split()
    for word in words:
        word_id = word_id_map[word]
        doc_word_str = str(doc_id) + ',' + str(word_id)
        if doc_word_str in doc_word_freq:
            doc_word_freq[doc_word_str] += 1
        else:
            doc_word_freq[doc_word_str] = 1


word_doc_list = {}

for i in range(train_size):
    doc_words = train_list[i]
    # words = doc_words.split()
    words = doc_words.split(':=:')[1].split()
    appeared = set()
    for word in words:
        if word in appeared:
            continue
        if word in word_doc_list:
            doc_list = word_doc_list[word]
            doc_list.append(i)
            word_doc_list[word] = doc_list
        else:
            word_doc_list[word] = [i]
        appeared.add(word)

word_doc_freq = {}
for word, doc_list in word_doc_list.items():
    word_doc_freq[word] = len(doc_list)

for i in range(train_size):
    doc_words = train_list[i]
    # words = doc_words.split()
    row_index = int(doc_words.split(':=:')[0])
    words = doc_words.split(':=:')[1].split()
    doc_word_set = set()
    for word in words:
        if word in doc_word_set:
            continue
        j = word_id_map[word]
        key = str(i) + ',' + str(j)
        freq = doc_word_freq[key]
        # row.append(i)
        row.append(row_index)
        col.append(train_size + j)
        idf = log(1.0 * len(train_list) /
                  word_doc_freq[vocab[j]])
        weight.append(freq * idf)
        doc_word_set.add(word)

print(len(row))
print(len(col))
print(len(weight))


## PMI Calculation for word-word edge weight in graph

In [None]:
#Get the train as a list and define window size
window_size = 20
windows = []

for doc_words in train_list:
    # words = doc_words.split()
    words = doc_words.split(':=:')[1].split()
    length = len(words)
    if length <= window_size:
        windows.append(words)
    else:
        for j in range(length - window_size + 1):
            window = words[j: j + window_size]
            windows.append(window)

# print(windows)

word_window_freq = {}
for window in windows:
    appeared = set()
    for i in range(len(window)):
        if window[i] in appeared:
            continue
        if window[i] in word_window_freq:
            word_window_freq[window[i]] += 1
        else:
            word_window_freq[window[i]] = 1
        appeared.add(window[i])

# print(word_window_freq)

word_pair_count = {}
for window in windows:
    for i in range(1, len(window)):
        for j in range(0, i):
            word_i = window[i]
            word_i_id = word_id_map[word_i]
            word_j = window[j]
            word_j_id = word_id_map[word_j]
            if word_i_id == word_j_id:
                continue
            # print(word_i + ' ' + word_j + ' ' + str(word_i_id) + ' ' + str(word_j_id))
            word_pair_str = str(word_i_id) + ',' + str(word_j_id)
            if word_pair_str in word_pair_count:
                word_pair_count[word_pair_str] += 1
            else:
                word_pair_count[word_pair_str] = 1
            # two orders
            # print(word_j + ' ' + word_i + ' ' + str(word_j_id) + ' ' + str(word_i_id))
            word_pair_str = str(word_j_id) + ',' + str(word_i_id)
            if word_pair_str in word_pair_count:
                word_pair_count[word_pair_str] += 1
            else:
                word_pair_count[word_pair_str] = 1


# print(word_pair_count)

# pmi as weights

num_window = len(windows)

for key in word_pair_count:
    temp = key.split(',')
    i = int(temp[0])
    j = int(temp[1])
    count = word_pair_count[key]
    word_freq_i = word_window_freq[vocab[i]]
    word_freq_j = word_window_freq[vocab[j]]
    pmi = log((1.0 * count / num_window) /
              (1.0 * word_freq_i * word_freq_j/(num_window * num_window)))
    if pmi <= 0:
        continue
    #Adjust the position of pmi weights in final adjacency matrix
    row.append(train_size + i)
    col.append(train_size + j)
    weight.append(pmi)

print(len(row))
print(len(col))
print(len(weight))

## Diagonal edge weight initialized to 1 in the adjacency matrix

In [None]:
node_size = train_size + vocab_size

for i in range(node_size):
  row.append(i)
  col.append(i)
  weight.append(1)


## Adjacency Matrix : A

In [None]:
import scipy.sparse as sp

#Train size contains all data (train + test both labeled and unlabeled)
print(len(row))
print(len(col)) 
print(len(weight))
print(node_size)

adj = sp.csr_matrix(
    (weight, (row, col)), shape=(node_size, node_size))

adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
print(adj)

## Build Feature Matrix: X

In [None]:
import math
import numpy as np

row_x = []
col_x = []
weight_x = []

# One hot vector for X as per text GCN
for i in range(node_size):
  row_x.append(i)
  col_x.append(i)
  weight_x.append(1)

x = sp.csr_matrix(
    (weight_x, (row_x, col_x)), shape=(node_size, node_size))


# x = sp.csr_matrix(
#     (weight_x, (row_x, col_x)), shape=(node_size, 768))

print(x.shape)



In [None]:
!pip install transformers
!pip install pytorch-ignite

# BertGCN Model

## Build label matrix: Y

In [None]:
train_y = []
validation_y = []
test_y = []
unlabeled_y = []
label_list = [0,1]

for index in df_train.index:
  if index in labeled_index_in_train:
    label = int(df_train['Contextual'][index])
    one_hot = [0 for l in range(len(label_list))]
    label_index = label_list.index(label)
    one_hot[label_index] = 1
    train_y.append(one_hot)
    validation_y.append([0,0])
    test_y.append([0,0])
  elif index in labeled_index_in_validation:
    label = int(df_train['Contextual'][index])
    one_hot = [0 for l in range(len(label_list))]
    label_index = label_list.index(label)
    one_hot[label_index] = 1
    train_y.append([0,0])
    validation_y.append(one_hot)
    test_y.append([0,0])
  elif index in labeled_index_in_test:
    label = int(df_test['Contextual'][index])
    one_hot = [0 for l in range(len(label_list))]
    label_index = label_list.index(label)
    one_hot[label_index] = 1
    train_y.append([0,0])
    validation_y.append([0,0])
    test_y.append(one_hot)
  else:
    train_y.append([0,0])
    validation_y.append([0,0])
    test_y.append([0,0])
  unlabeled_y.append([0,0])

for i in range(vocab_size):
  train_y.append([0,0])
  validation_y.append([0,0])
  test_y.append([0,0])
  unlabeled_y.append([0,0])

train_y = np.array(train_y)
validation_y = np.array(validation_y)
test_y = np.array(test_y)
unlabeled_y = np.array(unlabeled_y)

# print(x)
# print(y[:200])

def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

features = sp.vstack((x)).tolil()
features = sp.identity(features.shape[0])  
print(features.shape)

idx_train = labeled_index_in_train
idx_val = labeled_index_in_validation
idx_test = labeled_index_in_test
idx_unlabeled = unlabeled_index

print(idx_train)
print(idx_val)
print(idx_test)
print(idx_unlabeled)

train_mask = sample_mask(idx_train, train_y.shape[0])
val_mask = sample_mask(idx_val, validation_y.shape[0])
test_mask = sample_mask(idx_test, test_y.shape[0])
unlabeled_mask = sample_mask(idx_unlabeled, unlabeled_y.shape[0])

print(train_mask)
print(val_mask)
print(test_mask)
print(unlabeled_mask)

y_train = np.zeros(train_y.shape)
y_val = np.zeros(validation_y.shape)
y_test = np.zeros(test_y.shape)

y_train[train_mask, :] = train_y[train_mask, :]
y_val[val_mask, :] = validation_y[val_mask, :]
y_test[test_mask, :] = test_y[test_mask, :]

print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

In [None]:
import torch as th

max_length = 128
batch_size = 80
nb_epochs = 15
bert_lr = 1e-4
dataset = 'pubmed'
bert_init = 'roberta-base'
m = 0.7
gcn_layers = 2
n_hidden = 200
heads = 8
dropout = 0.5
gcn_lr = 1e-3
bert_lr = 1e-5


cpu = th.device('cpu')
gpu = th.device('cuda:0')


In [None]:
nb_node = adj.shape[0]
nb_train, nb_val, nb_test = train_mask.sum(), val_mask.sum(), test_mask.sum()
nb_word = nb_node - nb_train - nb_val - nb_test - len(unlabeled_index)
nb_class = y_train.shape[1]

## Graph Convolutional Layer, GCN, BertGCN Model class

In [None]:
!pip install  dgl -f https://data.dgl.ai/wheels/cu118/repo.html
!pip install  dglgo -f https://data.dgl.ai/wheels-test/repo.html

In [None]:
from dgl.nn.pytorch import GraphConv
from dgl import function as fn
from dgl.base import DGLError
from dgl.utils import expand_as_pair

class GraphConvEdgeWeight(GraphConv):

    def forward(self, graph, feat,  weight=None, edge_weight=None):
        with graph.local_scope():
            if not self._allow_zero_in_degree:
                if (graph.in_degrees() == 0).any():
                    raise DGLError('There are 0-in-degree nodes in the graph, '
                                   'output for those nodes will be invalid. '
                                   'This is harmful for some applications, '
                                   'causing silent performance regression. '
                                   'Adding self-loop on the input graph by '
                                   'calling `g = dgl.add_self_loop(g)` will resolve '
                                   'the issue. Setting ``allow_zero_in_degree`` '
                                   'to be `True` when constructing this module will '
                                   'suppress the check and let the code run.')

            # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite.
            feat_src, feat_dst = expand_as_pair(feat, graph)
            if self._norm == 'both':
                degs = graph.out_degrees().float().clamp(min=1)
                norm = th.pow(degs, -0.5)
                shp = norm.shape + (1,) * (feat_src.dim() - 1)
                norm = th.reshape(norm, shp)
                feat_src = feat_src * norm

            if weight is not None:
                if self.weight is not None:
                    raise DGLError('External weight is provided while at the same time the'
                                   ' module has defined its own weight parameter. Please'
                                   ' create the module with flag weight=False.')
            else:
                weight = self.weight

            if self._in_feats > self._out_feats:
                # mult W first to reduce the feature size for aggregation.
                if weight is not None:
                    feat_src = th.matmul(feat_src, weight)
                graph.srcdata['h'] = feat_src
                if edge_weight is None:
                    graph.update_all(fn.copy_src(src='h', out='m'),
                                     fn.sum(msg='m', out='h'))
                else:
                    graph.edata['a'] = edge_weight
                    graph.update_all(fn.u_mul_e('h', 'a', 'm'),
                                     fn.sum(msg='m', out='h'))
                rst = graph.dstdata['h']
            else:
                # aggregate first then mult W
                graph.srcdata['h'] = feat_src
                if edge_weight is None:
                    graph.update_all(fn.copy_src(src='h', out='m'),
                                     fn.sum(msg='m', out='h'))
                else:
                    graph.edata['a'] = edge_weight
                    graph.update_all(fn.u_mul_e('h', 'a', 'm'),
                                     fn.sum(msg='m', out='h'))
                rst = graph.dstdata['h']
                if weight is not None:
                    rst = th.matmul(rst, weight)

            if self._norm != 'none':
                degs = graph.in_degrees().float().clamp(min=1)
                if self._norm == 'both':
                    norm = th.pow(degs, -0.5)
                else:
                    norm = 1.0 / degs
                shp = norm.shape + (1,) * (feat_dst.dim() - 1)
                norm = th.reshape(norm, shp)
                rst = rst * norm

            if self.bias is not None:
                rst = rst + self.bias

            if self._activation is not None:
                rst = self._activation(rst)

            return rst


In [None]:
import torch.nn as nn
class GCN(nn.Module):
    def __init__(self,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout,
                 normalization='none'):
        super(GCN, self).__init__()
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation, norm=normalization))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation, norm=normalization))
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes, norm=normalization))
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, features, g, edge_weight):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(g, h, edge_weight=edge_weight)
        return h


In [None]:
import torch as th
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer

class BertGCN(th.nn.Module):
    def __init__(self, pretrained_model='roberta_base', nb_class=20, m=0.7, gcn_layers=2, n_hidden=200, dropout=0.5):
        super(BertGCN, self).__init__()
        self.m = m
        self.nb_class = nb_class
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
        self.bert_model = AutoModel.from_pretrained(pretrained_model)
        self.feat_dim = list(self.bert_model.modules())[-2].out_features
        self.classifier = th.nn.Linear(self.feat_dim, nb_class)
        self.gcn = GCN(
            in_feats=self.feat_dim,
            n_hidden=n_hidden,
            n_classes=nb_class,
            n_layers=gcn_layers-1,
            activation=F.elu,
            dropout=dropout
        )

    def forward(self, g, idx):
        input_ids, attention_mask = g.ndata['input_ids'][idx], g.ndata['attention_mask'][idx]
        if self.training:
            cls_feats = self.bert_model(input_ids, attention_mask)[0][:, 0]
            g.ndata['cls_feats'][idx] = cls_feats
        else:
            cls_feats = g.ndata['cls_feats'][idx]
        cls_logit = self.classifier(cls_feats)
        cls_pred = th.nn.Softmax(dim=1)(cls_logit)
        gcn_logit = self.gcn(g.ndata['cls_feats'], g, g.edata['edge_weight'])[idx]
        gcn_pred = th.nn.Softmax(dim=1)(gcn_logit)
        pred = (gcn_pred+1e-10) * self.m + cls_pred * (1 - self.m)
        pred = th.log(pred)
        return pred
    

In [None]:
# instantiate model according to class number
model = BertGCN(nb_class=nb_class, pretrained_model=bert_init, m=m, gcn_layers=gcn_layers,
                    n_hidden=n_hidden, dropout=dropout)

In [None]:
import torch

def encode_input(text, tokenizer):
    input = tokenizer(text, max_length=max_length, truncation=True, padding='max_length', return_tensors='pt')
    return input.input_ids, input.attention_mask

input_ids_, attention_mask_ = encode_input(train_list, model.tokenizer)

# create train/test/val datasets and dataloaders
input_ids_list = {'train':[], 'val': [], 'test': []}
attention_mask_list = {'train':[], 'val': [], 'test': []}
label_list = {'train':[], 'val': [], 'test': []}

print(input_ids_.shape)
for i in range(train_size):
  if i in labeled_index_in_train:
    input_ids_list['train'].append(input_ids_[i].tolist())
    attention_mask_list['train'].append(attention_mask_[i].tolist())
    label_list['train'].append(y_train[i].tolist())
  elif i in labeled_index_in_validation:
    input_ids_list['val'].append(input_ids_[i].tolist())
    attention_mask_list['val'].append(attention_mask_[i].tolist())
    label_list['val'].append(y_val[i].tolist())
  elif i in labeled_index_in_test:
    input_ids_list['test'].append(input_ids_[i].tolist())
    attention_mask_list['test'].append(attention_mask_[i].tolist())
    label_list['test'].append(y_test[i].tolist())
  else:
    input_ids_list['train'].append(input_ids_[i].tolist())
    attention_mask_list['train'].append(attention_mask_[i].tolist())
    label_list['train'].append(y_train[i].tolist())

input_ids, attention_mask, label = {}, {}, {}
input_ids['train'] = torch.tensor(input_ids_list['train'])
input_ids['val'] = torch.tensor(input_ids_list['val'])
input_ids['test'] = torch.tensor(input_ids_list['test'])

attention_mask['train'] = torch.tensor(attention_mask_list['train'])
attention_mask['val'] = torch.tensor(attention_mask_list['val'])
attention_mask['test'] = torch.tensor(attention_mask_list['test'])

input_ids = th.cat([input_ids['train'], input_ids['val'], input_ids['test'], th.zeros((nb_word, max_length), dtype=th.long)])
attention_mask = th.cat([attention_mask['train'], attention_mask['val'], attention_mask['test'], th.zeros((nb_word, max_length), dtype=th.long)])

# transform one-hot label to class ID for pytorch computation
y = y_train + y_val + y_test 
y_train = y_train.argmax(axis=1)
y = y.argmax(axis=1)

# document mask used for update feature
doc_mask  = train_mask + val_mask + test_mask + unlabeled_mask


In [None]:

import dgl

def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()


# build DGL Graph
adj_norm = normalize_adj(adj + sp.eye(adj.shape[0]))
print(adj_norm.shape)
print(dgl.__version__)
g = dgl.from_scipy(adj_norm.astype('float32'), eweight_name='edge_weight')
print(input_ids.shape)
print(attention_mask.shape)
g.ndata['input_ids'], g.ndata['attention_mask'] = input_ids, attention_mask
g.ndata['label'], g.ndata['train'], g.ndata['val'], g.ndata['test'] = \
    th.LongTensor(y), th.FloatTensor(train_mask), th.FloatTensor(val_mask), th.FloatTensor(test_mask)
g.ndata['label_train'] = th.LongTensor(y_train)
g.ndata['cls_feats'] = th.zeros((nb_node, model.feat_dim))

In [None]:
import torch.utils.data as Data

# create index loader
train_idx = Data.TensorDataset(torch.LongTensor(labeled_index_in_train))
val_idx = Data.TensorDataset(torch.LongTensor(labeled_index_in_validation))
test_idx = Data.TensorDataset(torch.LongTensor(labeled_index_in_test))
unlabeled_idx = Data.TensorDataset(torch.LongTensor(unlabeled_index))
doc_idx = Data.ConcatDataset([train_idx, val_idx, test_idx, unlabeled_idx])

idx_loader_train = Data.DataLoader(train_idx, batch_size=batch_size, shuffle=True)
idx_loader_val = Data.DataLoader(val_idx, batch_size=batch_size)
idx_loader_test = Data.DataLoader(test_idx, batch_size=batch_size)
idx_loader = Data.DataLoader(doc_idx, batch_size=batch_size, shuffle=True)


## Train BertGCN Model

In [None]:

from torch.optim import lr_scheduler
from sklearn.metrics import accuracy_score
from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer, Engine
from ignite.metrics import Accuracy, Loss

# Training
def update_feature():
    global model, g, doc_mask
    # no gradient needed, uses a large batchsize to speed up the process
    dataloader = Data.DataLoader(
        Data.TensorDataset(g.ndata['input_ids'][doc_mask], g.ndata['attention_mask'][doc_mask]),
        batch_size=256 #1024
    )
    with th.no_grad():
        model = model.to(gpu)
        model.eval()
        cls_list = []
        for i, batch in enumerate(dataloader):
            input_ids, attention_mask = [x.to(gpu) for x in batch]
            output = model.bert_model(input_ids=input_ids, attention_mask=attention_mask)[0][:, 0]
            cls_list.append(output.cpu())
        cls_feat = th.cat(cls_list, axis=0)
    g = g.to(cpu)
    g.ndata['cls_feats'][doc_mask] = cls_feat
    return g


optimizer = th.optim.Adam([
        {'params': model.bert_model.parameters(), 'lr': bert_lr},
        {'params': model.classifier.parameters(), 'lr': bert_lr},
        {'params': model.gcn.parameters(), 'lr': gcn_lr},
    ], lr=1e-3
)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30], gamma=0.1)


def train_step(engine, batch):
    global model, g, optimizer
    model.train()
    model = model.to(gpu)
    g = g.to(gpu)
    optimizer.zero_grad()
    (idx, ) = [x.to(gpu) for x in batch]
    optimizer.zero_grad()
    train_mask = g.ndata['train'][idx].type(th.BoolTensor)
    y_pred = model(g, idx)[train_mask]
    y_true = g.ndata['label_train'][idx][train_mask]
    loss = F.nll_loss(y_pred, y_true)
    loss.backward()
    optimizer.step()
    g.ndata['cls_feats'].detach_()
    train_loss = loss.item()
    with th.no_grad():
        if train_mask.sum() > 0:
            y_true = y_true.detach().cpu()
            y_pred = y_pred.argmax(axis=1).detach().cpu()
            train_acc = accuracy_score(y_true, y_pred)
        else:
            train_acc = 1
    return train_loss, train_acc


trainer = Engine(train_step)


In [None]:
@trainer.on(Events.EPOCH_COMPLETED)
def reset_graph(trainer):
    scheduler.step()
    update_feature()
    th.cuda.empty_cache()


## Test BertGCN Model

In [None]:

def test_step(engine, batch):
    global model, g
    with th.no_grad():
        model.eval()
        model = model.to(gpu)
        g = g.to(gpu)
        (idx, ) = [x.to(gpu) for x in batch]
        y_pred = model(g, idx)
        y_true = g.ndata['label'][idx]
        return y_pred, y_true


evaluator = Engine(test_step)
metrics={
    'acc': Accuracy(),
    'nll': Loss(th.nn.NLLLoss())
}
for n, f in metrics.items():
    f.attach(evaluator, n)



In [None]:

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(idx_loader_train)
    metrics = evaluator.state.metrics
    train_acc, train_nll = metrics["acc"], metrics["nll"]
    evaluator.run(idx_loader_val)
    metrics = evaluator.state.metrics
    val_acc, val_nll = metrics["acc"], metrics["nll"]
    print(
        "Epoch: {} Train acc: {:.4f} loss: {:.4f} Val acc: {:.4f} loss: {:.4f} "
        .format(trainer.state.epoch,train_acc, train_nll ,val_acc, val_nll)
    )
    if val_acc > log_training_results.best_val_acc:
        log_training_results.best_val_acc = val_acc



In [None]:
log_training_results.best_val_acc = 0
g = update_feature()
trainer.run(idx_loader, max_epochs=nb_epochs)

In [None]:
evaluator.run(idx_loader_test)
metrics = evaluator.state.metrics
test_acc, test_nll = metrics["acc"], metrics["nll"]

print(
        "Test acc: {:.4f} loss: {:.4f}"
        .format(test_acc, test_nll)
    )