 Li Zefeng 
 G2204688A

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [None]:
import torch

!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git

In [None]:
!pip install stanfordcorenlp

In [None]:
import nltk
nltk.download('stopwords')

In [None]:
import argparse
import os
# from pickletools import optimize
import random
import string
import time
import json
from math import log
from pathlib import Path
from datetime import datetime
import pytz

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # del
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

import numpy as np
import scipy.sparse as sp
from nltk.corpus import stopwords
from stanfordcorenlp import StanfordCoreNLP
from torch import Tensor, nn
from tqdm import tqdm
import torch
from torch.optim import AdamW
from torch.utils.data import TensorDataset, RandomSampler, SequentialSampler, DataLoader



class LSTM_classifier(nn.Module):
    def __init__(self, vocab_size, emb_size, hidden_size, num_labels, dropout, num_layers=1) -> None:
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.lstm = nn.LSTM(input_size=emb_size, hidden_size=hidden_size,num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=False)
        self.classifier = nn.Linear(hidden_size, num_labels)
    def forward(self, inputs):
        emb = self.embedding(inputs)
        output, (h_n, c_n) = self.lstm(emb)
        inter_output = torch.mean(output, dim=1)
        res = self.classifier(inter_output)
        return output, res



def gen_syn(corpus, nlp:StanfordCoreNLP, row_tfidf, col_tfidf, weight_tfidf, word_id_map, node_size, train_size):
    '''
    calculate syntactic relationship over words in the corpus
    input:
        corpus: a list that contains sentences/documents (strings)
        pmi: a dict that maps word pair to pmi
    '''
    t = time.time()
    stop_words = set(stopwords.words('english'))

    #获取句法依存关系对
    rela_pair_count_str = {}
    for doc_id in tqdm(range(len(corpus))):
        # print(doc_id)
        words = corpus[doc_id]
        words = words.split("\n")
        rela=[]
        for window in words:
            if not window.strip():
                continue
            #构造rela_pair_count
            window = window.replace(string.punctuation, ' ')
            try:
                r_dict = nlp._request('depparse', window)
            except json.decoder.JSONDecodeError:
                continue
            res = [(dep['governorGloss'], dep['dependentGloss']) for s in r_dict['sentences'] for dep in
            s['basicDependencies']]
            for tuple in res:
                rela.append(tuple[0] + ', ' + str(tuple[1]))
            for pair in rela:
                pair=pair.split(", ")
                if pair[0]=='ROOT' or pair[1]=='ROOT':
                    continue
                if pair[0] == pair[1]:
                    continue
                if pair[0] in string.punctuation or pair[1] in string.punctuation:
                    continue
                if pair[0] in stop_words or pair[1] in stop_words:
                    continue
                word_pair_str = pair[0] + ',' + pair[1]
                if word_pair_str in rela_pair_count_str:
                    rela_pair_count_str[word_pair_str] += 1
                else:
                    rela_pair_count_str[word_pair_str] = 1
                # two orders
                word_pair_str = pair[1] + ',' + pair[0]
                if word_pair_str in rela_pair_count_str:
                    rela_pair_count_str[word_pair_str] += 1
                else:
                    rela_pair_count_str[word_pair_str] = 1
    max_count = 0
    min_count = 1000000
    for v in rela_pair_count_str.values():
        if v < min_count:
            min_count = v
        if v > max_count:
            max_count = v
    graph = []
    row, col = [],[]
    for key in rela_pair_count_str:
        temp = key.split(',')
        if temp[0] not in word_id_map or temp[1] not in word_id_map:
            continue
        i = word_id_map[temp[0]]
        j = word_id_map[temp[1]]
        row.append(train_size + i)
        col.append(train_size + j)
        w = (rela_pair_count_str[key] - min_count) / (max_count - min_count)
        graph.append(w)
    weight = graph + weight_tfidf
    num_edges = len(row)
    row = row + row_tfidf
    col = col + col_tfidf
    adj = sp.csr_matrix(
        (weight, (row, col)), shape=(node_size, node_size))
    logger.info("Syntactic graph finish! Time spent {:2f} number of edges {}".format(time.time()-t, num_edges))
    return adj

def trans_corpus_to_ids(corpus, word_id_map, max_len):
    new_corpus = []
    for text in corpus:
        word_list = text.split()
        if len(word_list) > max_len:
            word_list = word_list[:max_len]
        new_corpus.append([word_id_map[w] + 1 for w in word_list]) # + 1 for padding
    # padding
    for i, one in enumerate(new_corpus):
        if len(one) < max_len:
            new_corpus[i] = one + [0]*(max_len-len(one))
    new_corpus = np.asarray(new_corpus, dtype=np.int32)
    return new_corpus

def lstm_eval(model, dataloader, device):
    model.eval()
    all_preds, all_labels,all_outs = [],[],[]
    for batch in dataloader:
        batch = [one.to(device) for one in batch]
        x, y = batch
        with torch.no_grad():
            output, pred = model(x)
            all_outs.append(output.cpu().numpy())
            pred_ids = torch.argmax(pred, dim=-1)
            all_preds += pred_ids.tolist()
            all_labels += y.tolist()
    acc = np.mean(np.asarray(all_preds) == np.asarray(all_labels))
    all_outs = np.concatenate(all_outs, axis=0)

    model.train()
    return acc, all_outs

def train_lstm(corpus, word_id_map, train_size, valid_size, labels, emb_size, hidden_size, dropout, batch_size, epochs, lr, weight_decay, num_labels,device,max_len, dataset, graphs_saved_path, num_layers):
    vocab_size = len(word_id_map) + 1
    corpus_ids = trans_corpus_to_ids(corpus, word_id_map, max_len)
    model = LSTM_classifier(vocab_size, emb_size, hidden_size, num_labels, dropout, num_layers=num_layers)
    model.to(device)
    train_data = corpus_ids[:train_size,:]
    dev_data = corpus_ids[train_size:train_size+valid_size,:]
    test_data = corpus_ids[train_size+valid_size:,:]
    train_label = labels[:train_size]
    dev_label = labels[train_size:train_size+valid_size]
    test_label = labels[train_size+valid_size:]
    train_x = torch.tensor(train_data, dtype=torch.long)
    train_y = torch.tensor(train_label, dtype=torch.long)
    dev_x = torch.tensor(dev_data, dtype=torch.long)
    dev_y = torch.tensor(dev_label, dtype=torch.long)
    test_x = torch.tensor(test_data, dtype=torch.long)
    test_y = torch.tensor(test_label, dtype=torch.long)
    train_dataset = TensorDataset(train_x, train_y)
    dev_dataset = TensorDataset(dev_x, dev_y)
    test_dataset = TensorDataset(test_x, test_y)
    train_sampler = RandomSampler(train_dataset)
    train_dev_sampler = SequentialSampler(train_dataset)
    dev_sampler = SequentialSampler(dev_dataset)
    test_sampler = SequentialSampler(test_dataset)
    train_dataloader = DataLoader(train_dataset,batch_size,sampler=train_sampler)
    train_dev_dataloader = DataLoader(train_dataset,batch_size,sampler=train_dev_sampler)
    dev_dataloader = DataLoader(dev_dataset,batch_size,sampler=dev_sampler)
    test_dataloader = DataLoader(test_dataset,batch_size,sampler=test_sampler)
    optimizer = AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    model.train()
    loss_func = nn.CrossEntropyLoss(reduction='mean')
    best_acc = 0.0

    # training LSTM
    if epochs > 0:
        for ep in range(epochs):
            logger.info("Starting epochs [{}/{}]".format(ep+1, epochs))
            for batch in tqdm(train_dataloader):
                batch = [one.to(device) for one in batch]
                x, y = batch
                output, pred = model(x)
                loss = loss_func(pred, y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            acc, all_outs = lstm_eval(model, dev_dataloader, device)
            if acc > best_acc:
                best_acc = acc
                logger.info("Saving semantic model into {}_lstm.bin".format(dataset))

                # create directory for saving LSTM
                # graphs_saved_path = "saved_graphs/run_{}".format(timestamp)
                # if not os.path.exists(graphs_saved_path):
                #     os.makedirs(graphs_saved_path)
                torch.save(model.state_dict(), os.path.join(graphs_saved_path, '{}_lstm.bin'.format(dataset)))
                logger.info("current best acc={:4f}".format(acc))
    else:
        # CRITICAL - this part is illogical
        logger.info("loading lstm model")
        model.load_state_dict(torch.load('lstm.bin'))
    acc, all_outs_train = lstm_eval(model, train_dev_dataloader, device)
    acc, all_outs_dev = lstm_eval(model, dev_dataloader, device)
    acc, all_outs_test = lstm_eval(model, test_dataloader, device)
    all_outs = np.concatenate([all_outs_train, all_outs_dev, all_outs_test], axis=0)
    return model, all_outs, corpus_ids

def gen_sem(args, corpus, word_id_map, row_tfidf, col_tfidf, weight_tfidf, thres, train_size, valid_size, labels, num_labels, node_size, device, graphs_saved_path):

    t = time.time()

    # training LSTM
    model, all_outs, corpus_ids = train_lstm(corpus, word_id_map, train_size, valid_size, labels, args.embed_size, args.hidden_size, args.dropout, args.batch_size, args.epochs, args.lr, args.weight_decay, num_labels,device, args.max_len, args.dataset, graphs_saved_path, args.num_layers)
    logger.info("Training LSTM completed")

    logger.info("all_outs:\n {}".format(all_outs))

    num_docs = all_outs.shape[0]
    test_ids = corpus_ids[train_size+valid_size:,:]
    cos_simi_count = {}
    for i in tqdm(range(num_docs)):
        text = corpus[i]
        word_list = text.split()
        max_len = len(word_list) if len(word_list) < args.max_len else args.max_len
        x = all_outs[i,:,:]
        x_norm = np.linalg.norm(x, ord=2, axis=-1, keepdims=True)
        simi_mat = np.dot(x, x.T) / np.dot(x_norm, x_norm.T) # L * L
        for k in range(max_len):
            for j in range(k+1, max_len):
                word_k_id = word_id_map[word_list[k]]
                word_j_id = word_id_map[word_list[j]]
                simi = simi_mat[k,j]
                if word_k_id == word_j_id:
                    continue
                if simi > thres:
                    word_pair_str = str(word_k_id) + ',' + str(word_j_id)
                    if word_pair_str in cos_simi_count:
                        cos_simi_count[word_pair_str] += 1
                    else:
                        cos_simi_count[word_pair_str] = 1
                    # two orders
                    word_pair_str = str(word_j_id) + ',' + str(word_k_id)
                    if word_pair_str in cos_simi_count:
                        cos_simi_count[word_pair_str] += 1
                    else:
                        cos_simi_count[word_pair_str] = 1

    max_count = 0
    min_count = 1000000
    row, col = [],[]
    for v in cos_simi_count.values():
        if v < min_count:
            min_count = v
        if v > max_count:
            max_count = v

    graph = []
    for key in cos_simi_count:
        temp = key.split(',')
        # if temp[0] not in word_id_map or temp[1] not in word_id_map:
        #     continue
        i = int(temp[0])
        j = int(temp[1])
        w = (cos_simi_count[key] - min_count) / (max_count - min_count)
        row.append(train_size + i)
        col.append(train_size + j)
        graph.append(w)

    weight = graph + weight_tfidf
    num_edges = len(row)
    row = row + row_tfidf
    col = col + col_tfidf
    adj = sp.csr_matrix(
        (weight, (row, col)), shape=(node_size, node_size))
    logger.info("Semantic graph finish! Time spent {:2f} number of edges {}".format(time.time()-t, num_edges))
    return adj

def gen_seq(corpus, train_size, test_size, window_size, word_id_map, row_tfidf, col_tfidf, weight_tfidf, vocab):
    windows = []
    row, col, weight = [],[],[]
    t = time.time()
    vocab_size = len(vocab)
    logger.info("Generating sequential graph...")
    logger.info("windows generating...")
    for doc_words in corpus:
        words = doc_words.split()
        length = len(words)
        if length <= window_size:
            windows.append(words)
        else:
            # print(length, length - window_size + 1)
            for j in range(length - window_size + 1):
                window = words[j: j + window_size]
                windows.append(window)
                # print(window)
    logger.info("calculating word frequency...")
    word_window_freq = {}
    for window in tqdm(windows):
        appeared = set()
        for i in range(len(window)):
            if window[i] in appeared:
                continue
            if window[i] in word_window_freq:
                word_window_freq[window[i]] += 1
            else:
                word_window_freq[window[i]] = 1
            appeared.add(window[i])
    logger.info("calculating word pair frequency...")
    word_pair_count = {}
    for window in windows:
        for i in range(1, len(window)):
            for j in range(0, i):
                word_i = window[i]
                word_i_id = word_id_map[word_i]
                word_j = window[j]
                word_j_id = word_id_map[word_j]
                if word_i_id == word_j_id:
                    continue
                word_pair_str = str(word_i_id) + ',' + str(word_j_id)
                if word_pair_str in word_pair_count:
                    word_pair_count[word_pair_str] += 1
                else:
                    word_pair_count[word_pair_str] = 1
                # two orders
                word_pair_str = str(word_j_id) + ',' + str(word_i_id)
                if word_pair_str in word_pair_count:
                    word_pair_count[word_pair_str] += 1
                else:
                    word_pair_count[word_pair_str] = 1
    num_window = len(windows)
    pmi_dict = {}
    logger.info("calculating pmi...")
    for key in word_pair_count:
        temp = key.split(',')
        i = int(temp[0])
        j = int(temp[1])
        count = word_pair_count[key]
        word_freq_i = word_window_freq[vocab[i]]
        word_freq_j = word_window_freq[vocab[j]]
        pmi = log((1.0 * count / num_window) /
                (1.0 * word_freq_i * word_freq_j / (num_window * num_window)))
        if pmi <= 0:
            continue
        row.append(train_size + i)
        col.append(train_size + j)
        weight.append(pmi)
        pmi_dict[key] = pmi
    logger.info("create pmi graph...")
    weight = weight + weight_tfidf
    num_edges = len(row)
    row = row + row_tfidf
    col = col + col_tfidf
    node_size = train_size + vocab_size + test_size
    adj = sp.csr_matrix(
        (weight, (row, col)), shape=(node_size, node_size))
    logger.info("Sequential graph finish! Time spent {:2f} number of edges {}".format(time.time()-t, num_edges))
    return pmi_dict, adj, row, col

def gen_tfidf(corpus, word_id_map, word_doc_freq, vocab, train_size):
    row, col, weight_tfidf = [],[],[]
    vocab_size = len(vocab)
    doc_word_freq = {}
    for doc_id in range(len(corpus)):
        doc_words = corpus[doc_id]
        words = doc_words.split()
        for word in words:
            word_id = word_id_map[word]
            doc_word_str = str(doc_id) + ',' + str(word_id)
            if doc_word_str in doc_word_freq:
                doc_word_freq[doc_word_str] += 1
            else:
                doc_word_freq[doc_word_str] = 1

    for i in range(len(corpus)):
        doc_words = corpus[i]
        words = doc_words.split()
        doc_word_set = set()
        for word in words:
            if word in doc_word_set:
                continue
            j = word_id_map[word]
            key = str(i) + ',' + str(j)
            freq = doc_word_freq[key]
            if i < train_size:
                row.append(i)
            else:
                row.append(i + vocab_size)
            col.append(train_size + j)
            idf = log(1.0 * len(corpus) /
                    word_doc_freq[vocab[j]])
            weight_tfidf.append(freq * idf)
            doc_word_set.add(word)
    return row, col, weight_tfidf

def gen_corpus(dataset):
    input1 = os.sep.join(['data', dataset])
    doc_name_list = []
    doc_train_list = []
    doc_test_list = []

    f = open(input1 + '.txt', 'r', encoding='latin1')
    lines = f.readlines()
    for line in lines:
        doc_name_list.append(line.strip())
        temp = line.split("\t")
        if temp[1].find('test') != -1:
            doc_test_list.append(line.strip())
        elif temp[1].find('train') != -1:
            doc_train_list.append(line.strip())
    f.close()

    doc_content_list = []
    f = open(input1 + '.clean.txt', 'r')
    lines = f.readlines()
    for line in lines:
        doc_content_list.append(line.strip())
    f.close()

    train_ids = []
    for train_name in doc_train_list:
        train_id = doc_name_list.index(train_name)
        train_ids.append(train_id)
    random.shuffle(train_ids)

    train_ids_str = '\n'.join(str(index) for index in train_ids)

    test_ids = []
    for test_name in doc_test_list:
        test_id = doc_name_list.index(test_name)
        test_ids.append(test_id)
    # print(test_ids)
    random.shuffle(test_ids)

    test_ids_str = '\n'.join(str(index) for index in test_ids)

    ids = train_ids + test_ids
    # print(ids)
    # print(len(ids))

    shuffle_doc_name_list = []
    shuffle_doc_words_list = []
    for id in ids:
        shuffle_doc_name_list.append(doc_name_list[int(id)])
        shuffle_doc_words_list.append(doc_content_list[int(id)])
    label_set = set()
    for doc_meta in shuffle_doc_name_list:
        temp = doc_meta.split('\t')
        label_set.add(temp[2])
    label_list = list(label_set)
    labels = []
    for one in shuffle_doc_name_list:
        entry = one.split('\t')
        labels.append(label_list.index(entry[-1]))
    shuffle_doc_name_str = '\n'.join(shuffle_doc_name_list)
    shuffle_doc_words_str = '\n'.join(shuffle_doc_words_list)
    word_freq = {}
    word_set = set()
    for doc_words in shuffle_doc_words_list:
        words = doc_words.split()
        for word in words:
            word_set.add(word)
            if word in word_freq:
                word_freq[word] += 1
            else:
                word_freq[word] = 1

    vocab = list(word_set)
    vocab_size = len(vocab)

    word_doc_list = {}

    for i in range(len(shuffle_doc_words_list)):
        doc_words = shuffle_doc_words_list[i]
        words = doc_words.split()
        appeared = set()
        for word in words:
            if word in appeared:
                continue
            if word in word_doc_list:
                doc_list = word_doc_list[word]
                doc_list.append(i)
                word_doc_list[word] = doc_list
            else:
                word_doc_list[word] = [i]
            appeared.add(word)

    word_doc_freq = {}
    for word, doc_list in word_doc_list.items():
        word_doc_freq[word] = len(doc_list)

    word_id_map = {}
    id_word_map = {}
    for i in range(vocab_size):
        word_id_map[vocab[i]] = i
        id_word_map[i] = vocab[i]

    return shuffle_doc_name_list, shuffle_doc_words_list, train_ids, test_ids, word_doc_freq, word_id_map, id_word_map, vocab, labels, label_list

def main(args, timestamp):
    # load stanfordcorenlp
    nlp = StanfordCoreNLP(args.corenlp, lang='en')

    # generate seed for reproducability
    set_torch_seed(seed=148)

    # set gpu or cpu
    if torch.cuda.is_available():
        device = torch.device("cuda")
        logger.info("Training is running on {}".format(device))
    else:
        device = torch.device("cpu")
        logger.info("Training is running on CPU")

    # load corpus
    name, corpus, train_ids, test_ids, word_doc_freq, word_id_map, id_word_map, vocab, labels, label_list = gen_corpus(args.dataset)
    data = [train_ids, test_ids, corpus, labels, vocab, word_id_map, id_word_map, label_list]

    json.dump(data, open('./data/{}_data.json'.format(args.dataset),'w'))
    num_labels = len(label_list)

    row_tfidf, col_tfidf, weight_tfidf = gen_tfidf(corpus, word_id_map, word_doc_freq, vocab, len(train_ids))

    # create directory for saving LSTM
    graphs_saved_path = "saved_graphs/run_{}".format(timestamp)
    argparse_dict = vars(args)
    if not os.path.exists(graphs_saved_path):
        os.makedirs(graphs_saved_path)
        with open(os.path.join(graphs_saved_path, 'graph_config.json'.format(timestamp)), 'w') as fjson:
            json.dump(argparse_dict, fjson)

    # generate sequential graph if true
    if args.gen_seq:
        pmi_dict, seq_adj, row, col = gen_seq(corpus, len(train_ids), len(test_ids), args.window_size, word_id_map, row_tfidf, col_tfidf, weight_tfidf, vocab)

        # pickle graph object
        pickle_graph(
            graph_type='sequential',
            dataset=args.dataset,
            graph_adj=seq_adj,
            graph_saved_path=graphs_saved_path)

    # generate syntatic graph if true
    if args.gen_syn:
        syn_adj = gen_syn(corpus, nlp, row_tfidf, col_tfidf, weight_tfidf, word_id_map, len(train_ids)+len(vocab)+len(test_ids), len(train_ids))

        # pickle graph object
        pickle_graph(
            graph_type='syntactic',
            dataset=args.dataset,
            graph_adj=syn_adj,
            graph_saved_path=graphs_saved_path)

    # generate semantic graph if true
    if args.gen_sem:
        valid_size = int(0.1*len(train_ids))
        train_size = len(train_ids) - valid_size
        sem_adj = gen_sem(args, corpus, word_id_map, row_tfidf, col_tfidf, weight_tfidf, args.thres, train_size, valid_size, labels, num_labels, len(train_ids)+len(vocab)+len(test_ids),device, graphs_saved_path)

        # pickle graph object
        pickle_graph(
            graph_type='semantic',
            dataset=args.dataset,
            graph_adj=sem_adj,
            graph_saved_path=graphs_saved_path)

def parse_args(args=None):
    parser = argparse.ArgumentParser(
        description='Training and Testing Knowledge Graph Embedding Models',
        usage='train.py [<args>] [-h | --help]'
    )
    parser.add_argument('--gen_syn', default=True)
    parser.add_argument('--gen_sem', default=True)
    parser.add_argument('--gen_seq', default=True)
    parser.add_argument('--dataset', type=str, default='mr')
    parser.add_argument('--window_size', type=int, default=7)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument("--batch_size", type=int, default=32)
    parser.add_argument("--embed_size", type=int, default=200)
    parser.add_argument("--max_len", default=512, type=int)
    parser.add_argument("--hidden_size", type=int, default=200)
    parser.add_argument("--dropout", default=0, type=float)
    parser.add_argument("--weight_decay", default=1e-6, type=float)
    parser.add_argument("--epochs", default=20, type=int)
    parser.add_argument("--seed", default=32, type=int)
    parser.add_argument("--corenlp", default='./stanford-corenlp-4.5.6')
    parser.add_argument('--thres', default=0.05, type=float, help="the threshold of semantic graph")
    parser.add_argument('--num_layers', default=1, type=int, help="number of layers in LSTM")
    return parser.parse_known_args(args)[0]


# retrieve execution timestamp for logs
sgt = pytz.timezone('Asia/Singapore')
timestamp = datetime.now(sgt).strftime("%Y-%m-%d_%H-%M-%S")

# set up logging
log_path = os.path.join(Path(os.path.abspath(os.path.dirname("__file__")), '../logs'))
logger = setup_logging(log_path=log_path, log_name='graph_log', log_filename='graph', timestamp=timestamp)

main(parse_args(), timestamp)

In [None]:
%cd ../

/content/gdrive/MyDrive/dl


In [None]:
%ls
%cd ./stanford-corenlp-4.5.6/

R8_data.json  R8_lstm.bin  [0m[01;34mstanford-corenlp-4.5.6[0m/
/content/gdrive/MyDrive/dl/stanford-corenlp-4.5.6


In [None]:
%ls

stanford-corenlp-4.5.6.jar


MODEL_PYTORCH


In [None]:
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch_sparse import SparseTensor




class TGCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_graphs, dropout=0.1, n_layers=2, bias=False, featureless=True, act='relu'):
        super().__init__()
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        print(num_graphs)
        self.embedding_list = nn.ModuleList([nn.Embedding(in_dim, hidden_dim) for _ in range(num_graphs)])
        for embed in self.embedding_list:
            nn.init.xavier_uniform_(embed.weight)
        self.layers = nn.ModuleList([GraphConv_fix(in_dim=hidden_dim, out_dim=hidden_dim, num_graphs=num_graphs, dropout=dropout, featureless=True, bias=False, act=act)])
        for _ in range(n_layers-2):
            self.layers.append(GraphConv_fix(in_dim=hidden_dim, out_dim=hidden_dim, num_graphs=num_graphs, dropout=dropout, featureless=False, bias=False, act=act))
        self.layers.append(GraphConv_fix(in_dim=hidden_dim, out_dim=out_dim, num_graphs=num_graphs, dropout=dropout, featureless=False, bias=False, act=act))
        self.num_graphs = num_graphs

    def word_dropout(self, inputs, keepprob):
        features = [embed(inputs) for embed in self.embedding_list]
        mask = torch.rand((features[0].size(0),1),device=features[0].device) > keepprob
        features = [f.masked_fill(mask, 0)* (1.0/keepprob) for f in features]
        return features

    def forward(self, inputs, edge_indexs, edge_weights, keepprob):
        features = self.word_dropout(inputs, keepprob)
        for layer in self.layers:
            features = layer(features, edge_indexs, edge_weights, keepprob)
        features = torch.stack(features, dim=0)
        features = torch.mean(features, dim=0)
        return features

class GraphConv_fix(nn.Module):
    def __init__(self, in_dim, out_dim, num_graphs, dropout=0.1, featureless=False, bias=False, act='relu'):
        super().__init__()
        # net_dict = {'gcn':GCNConv, 'sage':SAGEConv, 'gat':GATConv}
        # model_func = net_dict[kernel]
        self.intra_convs = nn.ModuleList([GCNConv(in_dim, out_dim,add_self_loops=False,normalize=False) for _ in range(num_graphs)])


        self.inter_convs = nn.ParameterList([nn.Parameter(torch.zeros((out_dim, out_dim), dtype=torch.float), requires_grad=True) for _ in range(num_graphs)])
        for tmp in self.inter_convs:
            nn.init.xavier_uniform_(tmp)
        if act == 'relu':
            self.act = nn.LeakyReLU(negative_slope=0.2)
        else:
            self.act = nn.Tanh()
        self.bias = bias
        # if self.bias:
        #     self.bias = nn.Parameter(torch.zeros(out_dim), requires_grad=True)
        #     nn.init.xavier_normal_(self.bias)
        self.dropout = nn.Dropout(dropout)
        self.featureless = featureless

    def atten(self, supports):
        tmp_supports = []

        #0:Sequential Graph
        #1:Semantic Graph
        #2:Syntactic Graph
        i = 2
        supports[i] = torch.matmul(supports[i], self.inter_convs[i])
        tmp_supports.append(supports[i])
        tmp_supports = torch.stack(tmp_supports, dim=0)
        tmp_supports_sum = torch.sum(tmp_supports, dim=0)
        att_features = []
        for support in supports:
            att_features.append(self.act(tmp_supports_sum-support))

        return att_features

    def forward(self, inputs, edge_indexs, edge_weights, keepprob):
        num_nodes = inputs[0].size(0)

        if not self.featureless:
            for i in range(len(inputs)):
                inputs[i] = self.dropout(inputs[i])

        supports = []
        for i, conv in enumerate(self.intra_convs):
            adj = SparseTensor(row=edge_indexs[i][0], col=edge_indexs[i][1], value=edge_weights[i],
                   sparse_sizes=(num_nodes, num_nodes))
            support = conv(inputs[i], adj.t())
            support = self.act(support)
            supports.append(support)

        # Apply attention mechanism only to the first two graphs
        supports = self.atten(supports)

        self.embedding = torch.stack(supports, dim=0)
        self.embedding = torch.mean(self.embedding, dim=0)

        return supports

UTIL

In [None]:
import numpy as np
import pickle as pkl
import scipy.sparse as sp
import os
import torch
import json
import random
import time
import logging
from logging.handlers import RotatingFileHandler
import logging

logger = logging.getLogger(__name__)


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

def load_corpus_torch(args, device):
    """
    Loads input corpus from gcn/data directory, torch tensor version

    ind.dataset_str.x => the feature vectors of the training docs as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test docs as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training docs/words
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training docs as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test docs as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.adj => adjacency matrix of word/doc nodes as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.train.index => the indices of training docs in original doc list.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :returns All data input files loaded (as well the training/test data).
    Returns:
        adj: sequential graph
        adj1: semantic graph
        adj2: syntactic graph
    """

    adjs = []
    for adj in ['seq','sem','syn']:
        logger.info("Loading {} graph".format(adj))
        if args.run_id is not None:
            try:
                adjs.append(pkl.load(open('./saved_graphs/run_{}/{}.{}_adj'.format(args.run_id,args.dataset,adj),'rb')))
                logger.info("Successfully loaded {} graph".format(adj))
            except Exception as e:
                logger.info('Unable to locate run_{}/{}.{}_adj in the directory'.format(args.run_id,args.dataset,adj))
        else:
            adjs.append(pkl.load(open('./data/{}.{}_adj'.format(args.dataset,adj),'rb')))

    data = json.load(open('./data/{}_data.json'.format(args.dataset),'r'))
    train_ids, test_ids, corpus, labels, vocab, word_id_map, id_word_map, label_list = data

    num_labels = len(label_list)
    train_size = len(train_ids)

    val_size = int(0.1*len(train_ids))
    test_size = len(test_ids)

    labels = np.asarray(labels[:train_size]+[0]*len(vocab)+labels[train_size:])
    print(len(labels))


    idx_train = range(train_size-val_size)
    idx_val = range(train_size-val_size, train_size)
    idx_test = range(train_size+len(vocab), train_size+len(vocab)+test_size)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask] = labels[train_mask]
    y_val[val_mask] = labels[val_mask]
    y_test[test_mask] = labels[test_mask]



    # seq, sem, syn = adjs[0], adjs[1], adjs[2]
    adjs_new = []
    for adj in adjs:
        adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
        adjs_new.append(adj)
    # seq = seq + seq.T.multiply(seq.T > seq) - seq.multiply(seq.T > seq)
    # sem = sem + sem.T.multiply(sem.T > sem) - sem.multiply(sem.T > sem)
    # syn = syn + syn.T.multiply(syn.T > syn) - syn.multiply(syn.T > syn)

    # tensor
    # adj = torch.sparse_csr_tensor(adj.indptr, adj.indices, adj.data, dtype=torch.float).to_sparse_coo().to(device)
    # adj1 = torch.sparse_csr_tensor(adj1.indptr, adj1.indices, adj1.data, dtype=torch.float).to_sparse_coo().to(device)
    # adj2 = torch.sparse_csr_tensor(adj2.indptr, adj2.indices, adj2.data, dtype=torch.float).to_sparse_coo().to(device)
    # features = torch.sparse_csr_tensor(features.indptr, features.indices, features.data, dtype=torch.float).to_sparse_coo().to(device)
    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
    y_test = torch.tensor(y_test, dtype=torch.long).to(device)
    train_mask = torch.tensor(train_mask, dtype=torch.float).to(device)
    val_mask = torch.tensor(val_mask, dtype=torch.float).to(device)
    test_mask = torch.tensor(test_mask, dtype=torch.float).to(device)

    return adjs_new, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size, val_size,test_size, num_labels

def get_edge_tensor_list(adj_list, device):
    """

    Args:
        adj_list [list]: list of adjencies
    """
    indice_list, data_list = [], []
    for adj in adj_list:
        row = torch.tensor(adj.row, dtype=torch.long).to(device)
        col = torch.tensor(adj.col, dtype=torch.long).to(device)
        data = torch.tensor(adj.data, dtype=torch.float).to(device)
        indice = torch.stack((row,col),dim=0)
        indice_list.append(indice)
        data_list.append(data)
    return indice_list, data_list

def get_edge_tensor(adj):
    row = torch.tensor(adj.row, dtype=torch.long)
    col = torch.tensor(adj.col, dtype=torch.long)
    data = torch.tensor(adj.data, dtype=torch.float)
    indice = torch.stack((row,col),dim=0)
    return indice, data

def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx


def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return sparse_to_tuple(features)

def preprocess_features_origin(features):
    """Row-normalize feature matrix"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return features

def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()


def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)

def preprocess_adj_mix(adj):
    adj_normalized = adj + sp.eye(adj.shape[0])
    return sparse_to_tuple(adj)

def preprocess_adj_tensor(adj, device):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return torch.sparse_coo_tensor(np.stack([adj_normalized.row, adj_normalized.col], axis=0), adj_normalized.data, adj_normalized.shape, dtype=torch.float).to(device)

def preprocess_adj_mix_tensor(adj, device):
    adj_normalized = adj + sp.eye(adj.shape[0])
    # return torch.sparse_csr_tensor(crow_indices=adj.indptr, col_indices=adj.indices, values=adj.data, dtype=torch.float).to_sparse_coo().to(device)
    return torch.tensor(adj.todense(), dtype=torch.float).to(device)

def pickle_graph(graph_type:str, dataset, graph_adj, graph_saved_path):
    """
    Function to pickle graph using context manager

    Args:
        graph_type [str]: name of graph to be serialised
        dataset [str]: name of dataset
    """
    logger = logging.getLogger(__name__)

    start_time = time.time()
    logger.info(f"Persisting {graph_type} graph")

    with open(os.path.join(graph_saved_path, '{}.{}_adj').format(dataset, graph_type[:3]), 'wb') as f:
        pkl.dump(graph_adj, f)
    logger.info("Successfully persisted {} graph. Serialisation took {} seconds".format(graph_type, round(time.time()-start_time),2))

def set_torch_seed (seed:int=148):
    """
    Function to randomly generate seed for torch to ensure reproducability

    Args:
        seed [int]: seed number
    """
    seed=148
    print(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

def setup_logging(log_path, log_name, timestamp, log_filename='model', max_bytes=1048576, backup_count=3):
    """
    Set up logging with RotatingFileHandler.

    Args:
    - log_path (str): Path to the directory where logs will be stored.
    - log_filename (str): Base name of the log file (timestamp will be appended).
    - max_bytes (int): Maximum size of each log file before it is rotated.
    - backup_count (int): Number of backup log files to keep.
    """
    # Create the log directory if it doesn't exist
    if not os.path.exists(log_path):
        os.makedirs(log_path)

    # Set up logging
    logger = logging.getLogger(log_name)
    logger.setLevel(logging.DEBUG)

    # Create a RotatingFileHandler
    log_file = os.path.join(log_path, f"{log_filename}_{timestamp}.log")
    handler = RotatingFileHandler(log_file, maxBytes=max_bytes, backupCount=backup_count)

    # Create a formatter and set it for the handler
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)

    # Add the handler to the logger
    logger.addHandler(handler)

    # Create a StreamHandler to print logs to the terminal
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.DEBUG)  # Adjust log level if needed
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)

    return logger


main

In [None]:
!pip install deepdish

In [None]:
!pip uninstall numpy
!pip install numpy==1.23.1

In [None]:
from __future__ import division, print_function

import argparse
import json
import os
import random
import time
from typing import List, Dict
from pathlib import Path
from datetime import datetime
import pytz

# import tensorflow as tf
import torch
import torch.nn as nn
from sklearn import metrics
import deepdish as dd
from tqdm import trange

os.environ['CUDA_VISIBLE_DEVICES'] = "0"

def parse_args(args=None):
    parser = argparse.ArgumentParser(
        description='Training and Testing Knowledge Graph Embedding Models',
        usage='train.py [<args>] [-h | --help]'
    )
    parser.add_argument('--do_train', default = True)
    parser.add_argument('--do_valid', default = True)
    parser.add_argument('--do_test', default = True)
    parser.add_argument('--no_sparse', action='store_true')
    parser.add_argument("--load_ckpt", action='store_true')
    parser.add_argument('--featureless', action='store_true')
    parser.add_argument('--use_sem', action='store_true')
    parser.add_argument('--use_syn', action='store_true')
    parser.add_argument('--use_seq', action='store_true')
    parser.add_argument("--save_path", type=str, default='./saved_model', help="the path of saved model")
    parser.add_argument('--dataset', type=str, default='mr', help='dataset name, default to mr')
    parser.add_argument('--model', type=str, default='gcn', help='model name, default to gcn')
    parser.add_argument('--lr', '--learning_rate', default=0.00002, type=float)   # 0.002/0.0002
    parser.add_argument("--epochs", default=300, type=int)
    parser.add_argument("--hidden", default=200, type=int)
    parser.add_argument("--layers", default=2, type=int)
    parser.add_argument("--dropout", default=0.8, type=float)   # 0.5/0.3/0.1
    parser.add_argument("--weight_decay", default=0.000001, type=float)
    parser.add_argument("--early_stop", default=300, type=int)
    parser.add_argument("--max_degree", default=3, type=int)
    parser.add_argument("--model_name", default='model', type=str)
    parser.add_argument("--run_id", default='2024-04-20_01-33-29', type=str)
    return parser.parse_known_args(args)[0]

def save_model(model, optimizer, args, timestamp):
    '''
    Save the parameters of the model   the optimizer,
    as well as some other variables such as step and learning_rate
    '''
    if not os.path.exists(os.path.join(args.save_path,'run_{}'.format(timestamp))):
        print(os.path.join(args.save_path,'run_{}'.format(timestamp)))
        os.makedirs(os.path.join(args.save_path,'run_{}'.format(timestamp)))
    argparse_dict = vars(args)
    with open(os.path.join(args.save_path, 'run_{}/{}_config.json'.format(timestamp, args.model_name)), 'w') as fjson:
        json.dump(argparse_dict, fjson)
    logger.info("Configurations for training:")
    logger.debug(argparse_dict)

    save_dict = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()}
    torch.save(save_dict, os.path.join(args.save_path, 'run_{}/{}.bin'.format(timestamp, args.model_name)))

    return True

def train(args, features, train_label, train_mask, val_label, val_mask, test_label, test_mask, model, indice_list, weight_list)-> List[Dict]:
    cost_train = []
    cost_valid = []
    cost_test = []
    acc_train = []
    acc_valid = []
    acc_test = []

    max_acc = 0.0
    min_cost = 10.0
    # for (name, param) in model.named_parameters():
    #     print(name)
    # weight_decay_list = (param for (name, param) in model.named_parameters() if 'layers.0' in name)
    # no_decay_list = (param for (name, param) in model.named_parameters() if 'layers.0' not in name)
    # parameters = [{'params':weight_decay_list},{'params':no_decay_list, 'weight_decay':0.0}]
    optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    loss_fct = nn.CrossEntropyLoss(reduction='none')
    model.train()
    for epoch in range(args.epochs):

        t = time.time()
        # Construct feed dictionary
        # feed_dict = construct_feed_dict(
        #     features, support, support_mix, y_train, train_mask, placeholders)
        # feed_dict.update({placeholders['dropout']: FLAGS.dropout})

        # Training step
        outs = model(features, indice_list, weight_list, 1-args.dropout)
        pre_loss = loss_fct(outs, train_label)
        train_pred = torch.argmax(outs, dim=-1)
        ce_loss = (pre_loss * train_mask/train_mask.mean()).mean()
        train_acc = ((train_pred == train_label).float() * train_mask/train_mask.mean()).mean()
        # loss = ce_loss + tmp_loss
        loss = ce_loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
        model.eval()
        # Validation
        valid_cost, valid_acc, pred, labels, duration = evaluate(args,
            features, val_label, val_mask, model, indice_list, weight_list)

        # Testing
        test_cost, test_acc, pred, labels, test_duration = evaluate(args,
            features, test_label, test_mask, model, indice_list, weight_list)
        model.train()

        cost_valid.append(valid_cost)

        cost_train.append(loss.item())
        cost_test.append(test_cost)
        acc_train.append(train_acc.item())
        acc_valid.append(valid_acc)
        acc_test.append(test_acc)

        # logger.info("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=",
        #     "{:.5f}".format(train_acc.item()), "val_loss=", "{:.5f}".format(valid_cost),
        #     "val_acc=", "{:.5f}".format(valid_acc), "test_loss=", "{:.5f}".format(test_cost), "test_acc=",
        #     "{:.5f}".format(test_acc), "time=", "{:.5f}".format(time.time() - t))
        logger.info("\n Epoch: {:04d} train_loss= {:.5f} train_acc= {:.5f} val_loss= {:.5f} val_acc= {:.5f} test_loss= {:.5f} test_acc= {:.5f} time= {:.5f}".format(
        epoch + 1, loss.item(), train_acc.item(), valid_cost, valid_acc, test_cost, test_acc, time.time() - t))


        # save model
        # if epoch > 700 and cost_valid[-1] < min_cost:
        if cost_valid[-1] < min_cost:
            saved_res = save_model(model, optimizer, args, timestamp)
            min_cost = cost_valid[-1]
            logger.info("Current best loss {:.5f}".format(min_cost))

        else:
            saved_res = False

        # if acc_valid[-1] > max_acc:
        #     save_model(model, optimizer, args)
        #     min_cost = cost_valid[-1]
        #     max_acc = acc_valid[-1]
        #     print("Current best acc {:.5f}".format(max_acc))

        # early stoppage implementation
        # training loop terminates if validation cost exceeds mean of previous epochs
        if epoch > args.early_stop and cost_valid[-1] > np.mean(cost_valid[-(args.early_stop + 1):-1]):
            logger.info("Early stopping...")
            break

    if not saved_res:
        save_model(model, optimizer, args, timestamp)
    logger.info("Optimization Finished!")

    loss_results = {
        'train_loss': cost_train,
        'valid_loss': cost_valid,
        'test_loss': cost_test
    }

    acc_results = {
        'train_acc': acc_train,
        'valid_acc': acc_valid,
        'test_acc': acc_test
    }

    return [loss_results, acc_results]

def evaluate(args, features, label, mask, model, indice_list, weight_list):
    t_test = time.time()
    loss_fct = nn.CrossEntropyLoss(reduction='none')
    with torch.no_grad():
        outs = model(features, indice_list, weight_list, 1)
        pre_loss = loss_fct(outs, label)
        pred = torch.argmax(outs, dim=-1)
        ce_loss = (pre_loss * mask/mask.mean()).mean()
        loss = ce_loss
        acc = ((pred == label).float() * mask/mask.mean()).mean()
    # feed_dict_val = construct_feed_dict(
    #     features, support, support_mix, labels, mask, placeholders)
    # outs_val = sess.run([model.loss, model.accuracy, model.pred, model.labels], feed_dict=feed_dict_val)
    return loss.item(), acc.item(), pred.cpu().numpy(), label.cpu().numpy(), (time.time() - t_test)

def load_ckpt(model):
    model_dict = model.state_dict()
    pretrained_dict = dd.io.load('./gcn.h5')
    model_dict['layers.0.intra_convs.0'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_1_vars_weights_0:0'].T, dtype=torch.float)
    model_dict['layers.0.inter_convs.0'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_1_vars_weights_00:0'].T, dtype=torch.float)
    model_dict['layers.0.intra_convs.1'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_1_vars_weights_1:0'].T, dtype=torch.float)
    model_dict['layers.0.inter_convs.1'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_1_vars_weights_11:0'].T, dtype=torch.float)
    model_dict['layers.0.intra_convs.2'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_1_vars_weights_2:0'].T, dtype=torch.float)
    model_dict['layers.0.inter_convs.2'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_1_vars_weights_22:0'].T, dtype=torch.float)
    model_dict['layers.1.intra_convs.0'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_2_vars_weights_0:0'].T, dtype=torch.float)
    model_dict['layers.1.inter_convs.0'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_2_vars_weights_00:0'].T, dtype=torch.float)
    model_dict['layers.1.intra_convs.1'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_2_vars_weights_1:0'].T, dtype=torch.float)
    model_dict['layers.1.inter_convs.1'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_2_vars_weights_11:0'].T, dtype=torch.float)
    model_dict['layers.1.intra_convs.2'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_2_vars_weights_2:0'].T, dtype=torch.float)
    model_dict['layers.1.inter_convs.2'] = torch.tensor(pretrained_dict['gcn_graphconvolution_mix1_2_vars_weights_22:0'].T, dtype=torch.float)
    model.load_state_dict(model_dict)

# tf.compat.v1.disable_eager_execution()
def get_edge_tensor(adj):
    row = torch.tensor(adj.row, dtype=torch.long)
    col = torch.tensor(adj.col, dtype=torch.long)
    data = torch.tensor(adj.data, dtype=torch.float)
    indice = torch.stack((row,col),dim=0)
    return indice, data


def main(args, timestamp):
    start_time = time.time()

    if torch.cuda.is_available():
        device = 'cuda'
        logger.info("Training is running on {}".format(device))
    else:
        device = None
        logger.info("Training is running on CPU")

    # Set random seed
    seed=147
    logger.info("Seed used: {}".format(seed))
    np.random.seed(seed)
    torch.manual_seed(seed)
    if device == 'cuda':
        torch.cuda.manual_seed(seed)
    # Load data
    # adj, adj1, adj2, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size, val_size,test_size, num_labels = load_corpus_torch(args, device)
    adj_lst, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size, val_size,test_size, num_labels = load_corpus_torch(args, device)
    # for adj in adj_lst:
    #     adj = adj.tocoo()
    # adj = adj.tocoo()
    # adj1 = adj1.tocoo()
    # adj2 = adj2.tocoo()
    logger.debug("adj:\n {}".format(adj_lst[0]))

    logger.info("The shape of adj is {}".format(adj_lst[0].shape))

    # one-hot features
    # features = torch.eye(adj.shape[0], dtype=torch.float).to_sparse().to(device)
    # support_mix = [adj, adj1, adj2]
    # support_mix = adj_lst
    indice_list, weight_list = [] , []
    for adjacency in adj_lst:
        adjacency = adjacency.tocoo()
        ind, dat = get_edge_tensor(adjacency)
        indice_list.append(ind.to(device))
        weight_list.append(dat.to(device))

    in_dim = adj_lst[0].shape[0]
    model = TGCN(in_dim=in_dim, hidden_dim=args.hidden, out_dim=num_labels, num_graphs=len(adj_lst), dropout=args.dropout, n_layers=args.layers, bias=False, featureless=args.featureless)
    features = torch.tensor(list(range(in_dim)), dtype=torch.long).to(device)

    model.to(device)

    if args.do_train:
        logger.info("Starting training")
        results = train(args, features, y_train, train_mask, y_val, val_mask, y_test, test_mask, model, indice_list, weight_list)

        with open(os.path.join(args.save_path ,'run_{}/{}_train_results.pkl'.format(timestamp, args.model_name)), 'wb') as f:
            pkl.dump(results, f)
        logger.info("Successfully pickled file '{}_train_results.pkl' with loss and accuracy metrics to {}".format(args.model_name, args.save_path))

    if args.do_valid:
        logger.info("Starting validation")
        # FLAGS.dropout = 1.0
        save_dict = torch.load(os.path.join(args.save_path, 'run_{}/{}.bin'.format(timestamp, args.model_name)))
        if args.load_ckpt:
            load_ckpt(model)
        else:
            model.load_state_dict(save_dict['model_state_dict'])
        model.eval()
        # Testing
        val_cost, val_acc, pred, labels, val_duration = evaluate(args,
            features, y_val, val_mask, model, indice_list, weight_list)
        # logger.info("Val set results:", "cost=", "{:.5f}".format(val_cost),
        #     "accuracy=", "{:.5f}".format(val_acc), "time=", "{:.5f}".format(val_duration))
        logger.info("Val set results: cost= {:.5f} accuracy= {:.5f} time= {:.5f}".format(val_cost, val_acc, val_duration))

        val_pred = []
        val_labels = []
        logger.debug(val_mask)
        logger.debug(len(val_mask))
        for i in range(len(val_mask)):
            if val_mask[i] == 1:
                val_pred.append(pred[i])
                val_labels.append(labels[i])

        logger.info("Val Precision, Recall and F1-Score...")
        logger.info("\n {} ".format(metrics.classification_report(val_labels, val_pred, digits=4)))
        logger.info("Macro average Val Precision, Recall and F1-Score...")
        logger.info(metrics.precision_recall_fscore_support(val_labels, val_pred, average='macro'))
        logger.info("Micro average Val Precision, Recall and F1-Score...")
        logger.info(metrics.precision_recall_fscore_support(val_labels, val_pred, average='micro'))

    if args.do_test:
        # FLAGS.dropout = 1.0
        logger.info("Starting testing")
        save_dict = torch.load(os.path.join(args.save_path, 'run_{}/{}.bin'.format(timestamp, args.model_name)))
        if args.load_ckpt:
            load_ckpt(model)
        else:
            model.load_state_dict(save_dict['model_state_dict'])
        model.eval()
        # Testing
        test_cost, test_acc, pred, labels, test_duration = evaluate(args,
            features, y_test, test_mask, model, indice_list, weight_list)
        # logger.info("Test set results:", "cost=", "{:.5f}".format(test_cost),
        #     "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))
        logger.info("Test set results: cost= {:.5f} accuracy= {:.5f} time= {:.5f}".format(test_cost, test_acc, test_duration))

        test_pred = []
        test_labels = []
        logger.debug("Test mask:")
        logger.debug(len(test_mask))
        for i in range(len(test_mask)):
            if test_mask[i] == 1:
                test_pred.append(pred[i])
                test_labels.append(labels[i])

        logger.info("Test Precision, Recall and F1-Score...")
        logger.info('\n {}'.format(metrics.classification_report(test_labels, test_pred, digits=4)))
        logger.info("Macro average Test Precision, Recall and F1-Score...")
        logger.info(metrics.precision_recall_fscore_support(test_labels, test_pred, average='macro'))
        logger.info("Micro average Test Precision, Recall and F1-Score...")
        logger.info(metrics.precision_recall_fscore_support(test_labels, test_pred, average='micro'))
    end_time = time.time()
    logger.info("Total execution time: {} seconds".format(round(end_time-start_time,2)))

if __name__ == '__main__':

    # retrieve execution timestamp for logs
    sgt = pytz.timezone('Asia/Singapore')
    timestamp = datetime.now(sgt).strftime("%Y-%m-%d_%H-%M-%S")

    # set up logging
    log_path = os.path.join(Path(os.path.abspath(os.path.dirname("__file__")), '../logs'))
    logger = setup_logging(log_path=log_path, log_name='training_log', timestamp=timestamp)

    main(parse_args(), timestamp)

2024-04-20 10:53:18,778 - INFO - Training is running on cuda
INFO:training_log:Training is running on cuda
2024-04-20 10:53:18,783 - INFO - Seed used: 147
INFO:training_log:Seed used: 147
2024-04-20 10:53:18,816 - INFO - Loading seq graph
INFO:training_log:Loading seq graph
2024-04-20 10:53:20,894 - INFO - Successfully loaded seq graph
INFO:training_log:Successfully loaded seq graph
2024-04-20 10:53:20,901 - INFO - Loading sem graph
INFO:training_log:Loading sem graph
2024-04-20 10:53:23,121 - INFO - Successfully loaded sem graph
INFO:training_log:Successfully loaded sem graph
2024-04-20 10:53:23,125 - INFO - Loading syn graph
INFO:training_log:Loading syn graph
2024-04-20 10:53:24,260 - INFO - Successfully loaded syn graph
INFO:training_log:Successfully loaded syn graph


29426


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return np.array(mask, dtype=np.bool)
2024-04-20 10:53:26,072 - DEBUG - adj:
   (0, 7338)	1.5008468300225124
  (0, 7923)	9.240961894450367
  (0, 8680)	3.1674184096404523
  (0, 8878)	2.764182956859557
  (0, 11372)	1.240134361043218
  (0, 11457)	1.9258537664551136
  (0, 13795)	3.3800384631178555
  (0, 15527)	4.997775178366651
  (0, 16390)	4.774631627052441
  (0, 17380)	6.016344759361225
  (0, 18078)	6.229918859659283
  (0, 18216)	6.709491939921169
  (0, 18820)	0.8349934545913218
  (0, 20432)	1.6673331910012603
  (0, 20941)	1.5195310253612764
  (0, 21666)	2.030213781779356
  (0, 24561)	4.983981856234315
  (0, 24886)	3.374543943800215
  (0, 25530)	6.229918859659283
  (1, 10247)	8.581294116822761
  (1, 11651)	1.0930021499776208
  (1, 13323)	8.175829008714597
  (1, 17772)	5.011761420341391
  (1, 18265)	7.665003384948606
  (1, 18820)	0.8349934545913218
  :	:
  (29425, 139

3


2024-04-20 10:53:27,534 - INFO - Starting training
INFO:training_log:Starting training
2024-04-20 10:53:31,334 - INFO - 
 Epoch: 0001 train_loss= 1.97805 train_acc= 0.48625 val_loss= 0.80962 val_acc= 0.48169 test_loss= 0.80461 test_acc= 0.49212 time= 3.79053
INFO:training_log:
 Epoch: 0001 train_loss= 1.97805 train_acc= 0.48625 val_loss= 0.80962 val_acc= 0.48169 test_loss= 0.80461 test_acc= 0.49212 time= 3.79053
2024-04-20 10:53:31,355 - INFO - Configurations for training:
INFO:training_log:Configurations for training:
2024-04-20 10:53:31,359 - DEBUG - {'do_train': True, 'do_valid': True, 'do_test': True, 'no_sparse': False, 'load_ckpt': False, 'featureless': False, 'use_sem': False, 'use_syn': False, 'use_seq': False, 'save_path': './saved_model', 'dataset': 'mr', 'model': 'gcn', 'lr': 2e-05, 'epochs': 300, 'hidden': 200, 'layers': 2, 'dropout': 0.8, 'weight_decay': 1e-06, 'early_stop': 100, 'max_degree': 3, 'model_name': 'model', 'run_id': '2024-04-20_01-33-29'}
DEBUG:training_log:{'

./saved_model/run_2024-04-20_18-53-18


2024-04-20 10:53:32,258 - INFO - Current best loss 0.80962
INFO:training_log:Current best loss 0.80962
2024-04-20 10:53:32,524 - INFO - 
 Epoch: 0002 train_loss= 2.06954 train_acc= 0.46624 val_loss= 0.83376 val_acc= 0.48873 test_loss= 0.82129 test_acc= 0.49465 time= 0.25689
INFO:training_log:
 Epoch: 0002 train_loss= 2.06954 train_acc= 0.46624 val_loss= 0.83376 val_acc= 0.48873 test_loss= 0.82129 test_acc= 0.49465 time= 0.25689
2024-04-20 10:53:32,714 - INFO - 
 Epoch: 0003 train_loss= 1.82477 train_acc= 0.51125 val_loss= 0.84399 val_acc= 0.48028 test_loss= 0.83005 test_acc= 0.49268 time= 0.18093
INFO:training_log:
 Epoch: 0003 train_loss= 1.82477 train_acc= 0.51125 val_loss= 0.84399 val_acc= 0.48028 test_loss= 0.83005 test_acc= 0.49268 time= 0.18093
2024-04-20 10:53:32,898 - INFO - 
 Epoch: 0004 train_loss= 1.78542 train_acc= 0.50641 val_loss= 0.84126 val_acc= 0.49014 test_loss= 0.82738 test_acc= 0.49690 time= 0.17800
INFO:training_log:
 Epoch: 0004 train_loss= 1.78542 train_acc= 0.50

In [None]:
%ls

[0m[01;34mgdrive[0m/  [01;34msample_data[0m/


In [None]:
%cd ./gdrive

/content/gdrive


In [None]:
%cd ./MyDrive/dl

/content/gdrive/MyDrive/dl
