In [28]:
import os
import os.path as osp

import math
import yaml
import copy
import time
import random
import argparse

import pandas as pd

import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp

import torch
from torch import nn
from torch.nn import Module
import torch.optim as optim
from torch.nn.parameter import Parameter

import sklearn
from sklearn import neighbors
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_distances as cos_dis, euclidean_distances

### 参数设置

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--gpu_id', default='0', help='Visible GPU id')
parser.add_argument('--model_version', default='DHGNN_v1', help='DHGNN model version, acceptable: DHGNN_v1, DHGNN_v2')
args = parser.parse_args(args=[])

os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

In [3]:
def get_config(dir):
    # add direction join function when parse the yaml file
    def join(loader, node):
        seq = loader.construct_sequence(node)
        return os.path.sep.join(seq)

    # add string concatenation function when parse the yaml file
    def concat(loader, node):
        seq = loader.construct_sequence(node)
        return ''.join(seq)

    yaml.add_constructor('!join', join)
    yaml.add_constructor('!concat', concat)
    with open(dir, 'r') as f:
        cfg = yaml.load(f)

    return cfg


def check_dir(folder):
    if not osp.exists(folder):
        os.mkdir(folder)

### 数据加载

In [6]:
def parse_index_file(filename):
    """
    Copied from gcn
    Parse index file.
    """
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

In [7]:
def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return features

In [10]:
def load_citation_data(cfg):
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("{}/ind.{}.{}".format(cfg['data_root'], cfg['activate_dataset'], names[i]), 'rb') as f:
            objects.append(pkl.load(f, encoding='latin1'))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("{}/ind.{}.test.index".format(cfg['data_root'], cfg['activate_dataset']))
    test_idx_range = np.sort(test_idx_reorder)

    if cfg['activate_dataset'] == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    features = preprocess_features(features)
    features = features.todense()

    G = nx.from_dict_of_lists(graph)
    edge_list = G.adjacency_list()

    degree = [0] * len(edge_list)
    if cfg['add_self_loop']:
        for i in range(len(edge_list)):
            edge_list[i].append(i)
            degree[i] = len(edge_list[i])
    max_deg = max(degree)
    mean_deg = sum(degree) / len(degree)
    print(f'max degree: {max_deg}, mean degree:{mean_deg}')

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]     # one-hot labels
    n_sample = labels.shape[0]
    n_category = labels.shape[1]
    lbls = np.zeros((n_sample,))
    if cfg['activate_dataset'] == 'citeseer':
        n_category += 1                                         # one-hot labels all zero: new category
        for i in range(n_sample):
            try:
                lbls[i] = np.where(labels[i]==1)[0]                     # numerical labels
            except ValueError:                              # labels[i] all zeros
                lbls[i] = n_category + 1                        # new category
    else:
        for i in range(n_sample):
            lbls[i] = np.where(labels[i]==1)[0]                     # numerical labels

    idx_test = test_idx_range.tolist()
    idx_train = list(range(len(y)))
    idx_val = list(range(len(y), len(y) + 500))
    
    return features, lbls, idx_train, idx_val, idx_test, n_category, edge_list, edge_list

In [46]:
def source_select(cfg):
    return load_citation_data

### 模型加载

In [14]:
def cos_dis(X):
    X = nn.functional.normalize(X)
    XT = X.transpose(0, 1)
    return torch.matmul(X, XT)

In [15]:
def sample_ids(ids, k):
    df = pd.DataFrame(ids)
    sampled_ids = df.sample(k - 1, replace=True).values
    sampled_ids = sampled_ids.flatten().tolist()
    sampled_ids.append(ids[-1])  # must sample the centroid node itself
    return sampled_ids

In [16]:
def sample_ids_v2(ids, k):
    df = pd.DataFrame(ids)
    sampled_ids = df.sample(k, replace=True).values
    sampled_ids = sampled_ids.flatten().tolist()
    return sampled_ids

In [18]:
class Transform(nn.Module):
    """
    A Vertex Transformation module
    Permutation invariant transformation: (N, k, d) -> (N, k, d)
    """
    def __init__(self, dim_in, k):
        """
        :param dim_in: input feature dimension
        :param k: k neighbors
        """
        super().__init__()

        self.convKK = nn.Conv1d(k, k * k, dim_in, groups=k)
        self.activation = nn.Softmax(dim=-1)
        self.dp = nn.Dropout()

    def forward(self, region_feats):
        """
        :param region_feats: (N, k, d)
        :return: (N, k, d)
        """
        N, k, _ = region_feats.size()  # (N, k, d)
        conved = self.convKK(region_feats)  # (N, k*k, 1)
        multiplier = conved.view(N, k, k)  # (N, k, k)
        multiplier = self.activation(multiplier)  # softmax along last dimension
        transformed_feats = torch.matmul(multiplier, region_feats)  # (N, k, d)
        return transformed_feats

In [19]:
class VertexConv(nn.Module):
    """
    A Vertex Convolution layer
    Transform (N, k, d) feature to (N, d) feature by transform matrix and 1-D convolution
    """
    def __init__(self, dim_in, k):
        """
        :param dim_in: input feature dimension
        :param k: k neighbors
        """
        super().__init__()

        self.trans = Transform(dim_in, k)                   # (N, k, d) -> (N, k, d)
        self.convK1 = nn.Conv1d(k, 1, 1)                    # (N, k, d) -> (N, 1, d)

    def forward(self, region_feats):
        """
        :param region_feats: (N, k, d)
        :return: (N, d)
        """
        transformed_feats = self.trans(region_feats)
        pooled_feats = self.convK1(transformed_feats)             # (N, 1, d)
        pooled_feats = pooled_feats.squeeze(1)
        return pooled_feats

In [20]:
class GraphConvolution(nn.Module):
    """
    A GCN layer
    """
    def __init__(self, **kwargs):
        """
        :param kwargs:
        # dim_in,
        # dim_out,
        # dropout_rate=0.5,
        # activation
        """
        super().__init__()

        self.dim_in = kwargs['dim_in']
        self.dim_out = kwargs['dim_out']
        self.fc = nn.Linear(self.dim_in, self.dim_out, bias=kwargs['has_bias'])
        self.dropout = nn.Dropout(p=0.5)
        self.activation = kwargs['activation']

    def _region_aggregate(self, feats, edge_dict):
        N = feats.size()[0]
        pooled_feats = torch.stack([torch.mean(feats[edge_dict[i]], dim=0) for i in range(N)])

        return pooled_feats

    def forward(self, ids, feats, edge_dict, G, ite):
        """
        :param ids: compatible with `MultiClusterConvolution`
        :param feats:
        :param edge_dict:
        :return:
        """
        x = feats  # (N, d)
        x = self.dropout(self.activation(self.fc(x)))  # (N, d')
        x = self._region_aggregate(x, edge_dict)  # (N, d)
        return x

In [21]:
class EdgeConv(nn.Module):
    """
    A Hyperedge Convolution layer
    Using self-attention to aggregate hyperedges
    """
    def __init__(self, dim_ft, hidden):
        """
        :param dim_ft: feature dimension
        :param hidden: number of hidden layer neurons
        """
        super().__init__()
        self.fc = nn.Sequential(nn.Linear(dim_ft, hidden), nn.ReLU(), nn.Linear(hidden, 1))

    def forward(self, ft):
        """
        use self attention coefficient to compute weighted average on dim=-2
        :param ft (N, t, d)
        :return: y (N, d)
        """
        scores = []
        n_edges = ft.size(1)
        for i in range(n_edges):
            scores.append(self.fc(ft[:, i]))
        scores = torch.softmax(torch.stack(scores, 1), 1)
        
        return (scores * ft).sum(1)

In [22]:
class DHGLayer(GraphConvolution):
    """
    A Dynamic Hypergraph Convolution Layer
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.ks = kwargs['structured_neighbor'] # number of sampled nodes in graph adjacency
        self.n_cluster = kwargs['n_cluster']              # number of clusters
        self.n_center = kwargs['n_center']                # a node has #n_center adjacent clusters
        self.kn = kwargs['nearest_neighbor']    # number of the 'k' in k-NN
        self.kc = kwargs['cluster_neighbor']    # number of sampled nodes in a adjacent k-means cluster
        self.wu_knn=kwargs['wu_knn']
        self.wu_kmeans=kwargs['wu_kmeans']
        self.wu_struct=kwargs['wu_struct']
        self.vc_sn = VertexConv(self.dim_in, self.ks+self.kn)    # structured trans
        self.vc_s = VertexConv(self.dim_in, self.ks)    # structured trans
        self.vc_n = VertexConv(self.dim_in, self.kn)    # nearest trans
        self.vc_c = VertexConv(self.dim_in, self.kc)   # k-means cluster trans
        self.ec = EdgeConv(self.dim_in, hidden=self.dim_in//4)
        self.kmeans = None
        self.structure = None

    def _vertex_conv(self, func, x):
        return func(x)

    def _structure_select(self, ids, feats, edge_dict):
        """
        :param ids: indices selected during train/valid/test, torch.LongTensor
        :param feats:
        :param edge_dict: torch.LongTensor
        :return: mapped graph neighbors
        """
        if self.structure is None:
            _N = feats.size(0)
            idx = torch.LongTensor([sample_ids(edge_dict[i], self.ks) for i in range(_N)])    # (_N, ks)
            self.structure = idx
        else:
            idx = self.structure

        idx = idx[ids]
        N = idx.size(0)
        d = feats.size(1)
        region_feats = feats[idx.view(-1)].view(N, self.ks, d)          # (N, ks, d)
        return region_feats

    def _nearest_select(self, ids, feats):
        """
        :param ids: indices selected during train/valid/test, torch.LongTensor
        :param feats:
        :return: mapped nearest neighbors
        """
        dis = cos_dis(feats)
        _, idx = torch.topk(dis, self.kn, dim=1)
        idx = idx[ids]
        N = len(idx)
        d = feats.size(1)
        nearest_feature = feats[idx.view(-1)].view(N, self.kn, d)         # (N, kn, d)
        return nearest_feature

    def _cluster_select(self, ids, feats):
        """
        compute k-means centers and cluster labels of each node
        return top #n_cluster nearest cluster transformed features
        :param ids: indices selected during train/valid/test, torch.LongTensor
        :param feats:
        :return: top #n_cluster nearest cluster mapped features
        """
        if self.kmeans is None:
            _N = feats.size(0)
            np_feats = feats.detach().cpu().numpy()
            kmeans = KMeans(n_clusters=self.n_cluster, random_state=0, n_jobs=-1).fit(np_feats)
            centers = kmeans.cluster_centers_
            dis = euclidean_distances(np_feats, centers)
            _, cluster_center_dict = torch.topk(torch.Tensor(dis), self.n_center, largest=False)
            cluster_center_dict = cluster_center_dict.numpy()
            point_labels = kmeans.labels_
            point_in_which_cluster = [np.where(point_labels == i)[0] for i in range(self.n_cluster)]
            idx = torch.LongTensor([[sample_ids_v2(point_in_which_cluster[cluster_center_dict[point][i]], self.kc)   
                        for i in range(self.n_center)] for point in range(_N)])    # (_N, n_center, kc)
            self.kmeans = idx
        else:
            idx = self.kmeans
        
        idx = idx[ids]
        N = idx.size(0)
        d = feats.size(1)
        cluster_feats = feats[idx.view(-1)].view(N, self.n_center, self.kc, d)

        return cluster_feats                    # (N, n_center, kc, d)

    def _edge_conv(self, x):
        return self.ec(x)

    def _fc(self, x):
        return self.activation(self.fc(self.dropout(x)))

    def forward(self, ids, feats, edge_dict, G, ite):
        hyperedges = []    
        if ite >= self.wu_kmeans:
            c_feat = self._cluster_select(ids, feats)
            for c_idx in range(c_feat.size(1)):
                xc = self._vertex_conv(self.vc_c, c_feat[:, c_idx, :, :])
                xc  = xc.view(len(ids), 1, feats.size(1))               # (N, 1, d)          
                hyperedges.append(xc)
        if ite >= self.wu_knn:
            n_feat = self._nearest_select(ids, feats)
            xn = self._vertex_conv(self.vc_n, n_feat)
            xn  = xn.view(len(ids), 1, feats.size(1))                   # (N, 1, d)
            hyperedges.append(xn)
        if ite >= self.wu_struct:
            s_feat = self._structure_select(ids, feats, edge_dict)
            xs = self._vertex_conv(self.vc_s, s_feat)
            xs  = xs.view(len(ids), 1, feats.size(1))                   # (N, 1, d)
            hyperedges.append(xs)
        x = torch.cat(hyperedges, dim=1)
        x = self._edge_conv(x)                                          # (N, d)
        x = self._fc(x)                                                 # (N, d')
        return x

In [23]:
class HGNN_conv(nn.Module):
    """
    A HGNN layer
    """
    def __init__(self, **kwargs):
        super(HGNN_conv, self).__init__()

        self.dim_in = kwargs['dim_in']
        self.dim_out = kwargs['dim_out']
        self.fc = nn.Linear(self.dim_in, self.dim_out, bias=kwargs['has_bias'])
        self.dropout = nn.Dropout(p=0.5)
        self.activation = kwargs['activation']


    def forward(self, ids, feats, edge_dict, G, ite):
        x = feats
        x = self.activation(self.fc(x))
        x = G.matmul(x)
        x = self.dropout(x)
        return x

In [25]:
class DHGNN_v1(nn.Module):
    """
    Dynamic Hypergraph Convolution Neural Network with a GCN-style input layer
    """
    def __init__(self, **kwargs):
        super().__init__()

        self.dim_feat = kwargs['dim_feat']
        self.n_categories = kwargs['n_categories']
        self.n_layers = kwargs['n_layers']
        layer_spec = kwargs['layer_spec']
        self.dims_in = [self.dim_feat] + layer_spec
        self.dims_out = layer_spec + [self.n_categories]
        activations = nn.ModuleList([nn.ReLU() for i in range(self.n_layers - 1)] + [nn.LogSoftmax(dim=-1)])
        self.gcs = nn.ModuleList([GraphConvolution(
            dim_in=self.dims_in[0],
            dim_out=self.dims_out[0],
            dropout_rate=kwargs['dropout_rate'],
            activation=activations[0],
            has_bias=kwargs['has_bias'])]
            + [DHGLayer(
            dim_in=self.dims_in[i],
            dim_out=self.dims_out[i],
            dropout_rate=kwargs['dropout_rate'],
            activation=activations[i],
            structured_neighbor=kwargs['k_structured'],
            nearest_neighbor=kwargs['k_nearest'],
            cluster_neighbor=kwargs['k_cluster'],
            wu_knn=kwargs['wu_knn'],
            wu_kmeans=kwargs['wu_kmeans'],
            wu_struct=kwargs['wu_struct'],
            n_cluster=kwargs['clusters'],
            n_center=kwargs['adjacent_centers'],
            has_bias=kwargs['has_bias']) for i in range(1, self.n_layers)])

    def forward(self, **kwargs):
        """
        :param feats:
        :param edge_dict:
        :param G:
        :return:
        """
        ids = kwargs['ids']
        feats = kwargs['feats']
        edge_dict = kwargs['edge_dict']
        G = kwargs['G']
        ite = kwargs['ite']

        x = feats
        for i_layer in range(self.n_layers):
            x = self.gcs[i_layer](ids, x, edge_dict, G, ite)
        return x

In [26]:
class DHGNN_v2(nn.Module):
    """
    Dynamic Hypergraph Convolution Neural Network with a HGNN-style input layer
    """
    def __init__(self, **kwargs):
        super().__init__()

        self.dim_feat = kwargs['dim_feat']
        self.n_categories = kwargs['n_categories']
        self.n_layers = kwargs['n_layers']
        layer_spec = kwargs['layer_spec']
        self.dims_in = [self.dim_feat] + layer_spec
        self.dims_out = layer_spec + [self.n_categories]
        activations = nn.ModuleList([nn.ReLU() for i in range(self.n_layers - 1)] + [nn.LogSoftmax(dim=-1)])
        self.gcs = nn.ModuleList([HGNN_conv(
            dim_in=self.dims_in[0],
            dim_out=self.dims_out[0],
            dropout_rate=kwargs['dropout_rate'],
            activation=activations[0],
            has_bias=kwargs['has_bias'])]
            + [DHGLayer(
            dim_in=self.dims_in[i],
            dim_out=self.dims_out[i],
            dropout_rate=kwargs['dropout_rate'],
            activation=activations[i],
            structured_neighbor=kwargs['k_structured'],
            nearest_neighbor=kwargs['k_nearest'],
            cluster_neighbor=kwargs['k_cluster'],
            wu_knn=kwargs['wu_knn'],
            wu_kmeans=kwargs['wu_kmeans'],
            wu_struct=kwargs['wu_struct'],
            n_cluster=kwargs['clusters'],
            n_center=kwargs['adjacent_centers'],
            has_bias=kwargs['has_bias']) for i in range(1, self.n_layers)])

    def forward(self, **kwargs):
        """
        :param feats:
        :param edge_dict:
        :param G:
        :return:
        """
        ids = kwargs['ids']
        feats = kwargs['feats']
        edge_dict = kwargs['edge_dict']
        G = kwargs['G']
        ite = kwargs['ite']

        x = feats
        for i_layer in range(self.n_layers):
            x = self.gcs[i_layer](ids, x, edge_dict, G, ite)
        return x

In [27]:
def model_select(activate_model):
    if activate_model == 'DHGNN_v1':
        return DHGNN_v1
    elif activate_model == 'DHGNN_v2':
        return DHGNN_v2
    else:
        raise ValueError

### Others

In [29]:
def _edge_dict_to_H(edge_dict):
    n_nodes = len(edge_dict)
    H = np.zeros(shape=(n_nodes, n_nodes))
    for center_id, adj_list in enumerate(edge_dict):
        H[center_id, center_id] = 1.0
        for adj_id in adj_list:
            H[adj_id, center_id] = 1.0
    return H

In [30]:
def _generate_G_from_H(H, variable_weight=False):
    H = np.array(H)
    n_edge = H.shape[1]
    # the weight of the hyperedge
    W = np.ones(n_edge)
    # the degree of the node
    DV = np.sum(H * W, axis=1)
    # the degree of the hyperedge
    DE = np.sum(H, axis=0)

    invDE = np.mat(np.diag(np.power(DE, -1)))
    DV2 = np.mat(np.diag(np.power(DV, -0.5)))
    W = np.mat(np.diag(W))
    H = np.mat(H)
    HT = H.T

    if variable_weight:
        DV2_H = DV2 * H
        invDE_HT_DV2 = invDE * HT * DV2
        return DV2_H, W, invDE_HT_DV2
    else:
        G = DV2 * H * W * invDE * HT * DV2
        return G

- 随机种子

In [31]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [32]:
seed_num = 1000
setup_seed(seed_num) 
print('Using random seed: ', seed_num)

Using random seed:  1000


In [34]:
cfg = get_config(r'C:\Users\sss\Desktop\DHGNN-master\config/config.yaml')
cfg["model"] = args.model_version

### Train & Test

In [104]:
def train(model, fts, lbls, idx_train, idx_val, edge_dict, G, criterion, optimizer, scheduler, device, num_epochs=25, print_freq=500):
    since = time.time()

    state_dict_updates = 0          # number of epochs that updates state_dict

    device = torch.cuda.is_available()
    
    if device:
        model = model.cuda()
    else:
        model = model

    model_wts_best_val_acc = copy.deepcopy(model.state_dict())
    model_wts_lowest_val_loss = copy.deepcopy(model.state_dict())

    best_acc = 0.0
    loss_min = 100.0
    acc_epo = 0
    loss_epo = 0

    for epoch in range(num_epochs):
        epo = epoch

        if epoch % print_freq == 0:
            print('-' * 10)
            print(f'Epoch {epoch}/{num_epochs - 1}')

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            idx = idx_train if phase == 'train' else idx_val

            optimizer.zero_grad()
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(ids=idx, feats=fts, edge_dict=edge_dict, G=G, ite=epo)

                loss = criterion(outputs, lbls[idx]) * len(idx)
                _, preds = torch.max(outputs, 1)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss
            running_corrects += torch.sum(preds == lbls.data[idx])

            epoch_loss = running_loss / len(idx)
            epoch_acc = running_corrects.double() / len(idx)

            if epoch % print_freq == 0:
                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc

                model_wts_best_val_acc = copy.deepcopy(model.state_dict())

                acc_epo = epoch 
                state_dict_updates += 1

            if phase == 'val' and epoch_loss < loss_min:
                loss_min = epoch_loss

                model_wts_lowest_val_loss = copy.deepcopy(model.state_dict())

                loss_epo = epoch 
                state_dict_updates += 1

            if epoch % print_freq == 0 and phase == 'val':
                print(f'Best val Acc: {best_acc:4f}, Min val loss: {loss_min:4f}')
                print('-' * 20)


    time_elapsed = time.time() - since
    print(f'\nTraining complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'\nState dict updates {state_dict_updates}')
    print(f'Best val Acc: {best_acc:4f}')

    return (model_wts_best_val_acc, acc_epo), (model_wts_lowest_val_loss, loss_epo)

In [111]:
def test(model, best_model_wts, fts, lbls, n_category, idx_test, edge_dict, G, device, test_time = 1):
    best_model_wts, epo = best_model_wts
    
    device = torch.cuda.is_available()
    
    if device:
        model = model.cuda()
    else:
        model = model
        
    model.load_state_dict(best_model_wts)
    model.eval()

    running_corrects = 0.0

    if device:
        outputs = torch.zeros(len(idx_test), n_category).cuda()
    else:
        outputs = torch.zeros(len(idx_test), n_category)

    for _ in range(test_time):

        with torch.no_grad():

            outputs += model(ids=idx_test, feats=fts, edge_dict=edge_dict, G=G, ite=epo)

    _, preds = torch.max(outputs, 1)
    running_corrects += torch.sum(preds == lbls.data[idx_test])
    test_acc = running_corrects.double() / len(idx_test)

    print('*' * 20)
    print(f'Test acc: {test_acc} @Epoch-{epo}')
    print('*' * 20)

    return test_acc, epo

In [112]:
def train_test_model(cfg):
    device = torch.cuda.is_available()

    source = source_select(cfg)
    print(f'Using {cfg["activate_dataset"]} dataset')
    fts, lbls, idx_train, idx_val, idx_test, n_category, _, edge_dict = source(cfg)

    H = _edge_dict_to_H(edge_dict)
    G = _generate_G_from_H(H)

    if device:
        G = torch.Tensor(G).cuda()
        fts = torch.Tensor(fts).cuda()
        lbls = torch.Tensor(lbls).squeeze().long().cuda()
        
    else:
        G = torch.Tensor(G)
        fts = torch.Tensor(fts)
        lbls = torch.Tensor(lbls).squeeze().long()

    model = model_select(cfg['model'])(
        dim_feat=fts.size(1),
        n_categories=n_category,
        k_structured=cfg['k_structured'],
        k_nearest=cfg['k_nearest'],
        k_cluster=cfg['k_cluster'],
        wu_knn=cfg['wu_knn'],
        wu_kmeans=cfg['wu_kmeans'],
        wu_struct=cfg['wu_struct'],
        clusters=cfg['clusters'],
        adjacent_centers=cfg['adjacent_centers'],
        n_layers=cfg['n_layers'],
        layer_spec=cfg['layer_spec'],
        dropout_rate=cfg['drop_out'],
        has_bias=cfg['has_bias']
    )

    #initialize model
    state_dict = model.state_dict()
    for key in state_dict:
        if 'weight' in key:
            nn.init.xavier_uniform_(state_dict[key])
        elif 'bias' in key:
            state_dict[key] = state_dict[key].zero_()

    optimizer = optim.Adam(model.parameters(), lr=cfg['lr'],weight_decay=cfg['weight_decay'], eps=1e-20)
    schedular = optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg['milestones'], gamma=cfg['gamma'])
    criterion = torch.nn.NLLLoss()

    # transductive learning mode
    model_wts_best_val_acc, model_wts_lowest_val_loss = train(model, fts, lbls, idx_train, idx_val, edge_dict, G, criterion, optimizer, schedular, device, cfg['max_epoch'], cfg['print_freq'])
    
    if idx_test is not None:
        print('**** Model of lowest val loss ****')
        test_acc_lvl, epo_lvl = test(model, model_wts_lowest_val_loss, fts, lbls, n_category, idx_test, edge_dict, G, device, cfg['test_time'])
        print('**** Model of best val acc ****')
        test_acc_bva, epo_bva = test(model, model_wts_best_val_acc, fts, lbls, n_category, idx_test, edge_dict, G, device, cfg['test_time'])
        return (test_acc_lvl, epo_lvl), (test_acc_bva, epo_bva)
    else:
        return None

In [113]:
seed_num = 1000
setup_seed(seed_num)
print('Using random seed: ', seed_num)

Using random seed:  1000


In [114]:
root = r"C:\Users\sss\Desktop\DHGNN-master/"

cfg = get_config(root + 'config/config.yaml')
cfg['model'] = args.model_version

In [115]:
train_test_model(cfg)

Using cora dataset
max degree: 169, mean degree:4.89807976366322
----------
Epoch 0/24




train Loss: 1.9457 Acc: 0.1786
val Loss: 1.9401 Acc: 0.1300
Best val Acc: 0.130000, Min val loss: 1.940101
--------------------
----------
Epoch 1/24
train Loss: 1.9388 Acc: 0.2000
val Loss: 1.9108 Acc: 0.6220
Best val Acc: 0.622000, Min val loss: 1.910787
--------------------
----------
Epoch 2/24
train Loss: 1.9137 Acc: 0.3500
val Loss: 1.8588 Acc: 0.7000
Best val Acc: 0.700000, Min val loss: 1.858844
--------------------
----------
Epoch 3/24
train Loss: 1.8730 Acc: 0.4429
val Loss: 1.7715 Acc: 0.7500
Best val Acc: 0.750000, Min val loss: 1.771529
--------------------
----------
Epoch 4/24
train Loss: 1.7879 Acc: 0.5429
val Loss: 1.6423 Acc: 0.7740
Best val Acc: 0.774000, Min val loss: 1.642348
--------------------
----------
Epoch 5/24
train Loss: 1.7174 Acc: 0.7000
val Loss: 1.5523 Acc: 0.7760
Best val Acc: 0.776000, Min val loss: 1.552295
--------------------
----------
Epoch 6/24
train Loss: 1.4879 Acc: 0.7857
val Loss: 1.3174 Acc: 0.8000
Best val Acc: 0.800000, Min val loss: 1.

((tensor(0.8200, dtype=torch.float64), 10),
 (tensor(0.8360, dtype=torch.float64), 24))