# GATE

In [3]:
from utils import (create_logger, set_random_seed, rm_suffix,
                   mkdir_if_no_exists)
from utils import (read_meta, read_probs, l2norm, knns2ordered_nbrs,
                   intdict2ndarray, Timer)
from utils import (write_meta, write_feat)
from utils.knn import *

from mmcv import Config 
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init


## load data

In [4]:
config='/dcs/pg20/u2085214/fc/learn-to-cluster/test_train_cfg_trial.py'
cfg = Config.fromfile(config)

cfg.phase = 'train'
cfg.cuda = torch.cuda.is_available()
cfg.load_from = None
cfg.resume_from = None
cfg.gpus = 1
cfg.distributed = False
cfg.save_output = False
cfg.no_cuda = False
cfg.force = False
cfg.work_dir = './data/'
cfg.cut_name = '_cut'

for k, v in cfg.model['kwargs'].items(): #kwargs=dict(feature_dim=256)
    setattr(cfg.train_data, k, v) #k? v?



In [5]:
import numpy as np

from utils import (read_meta, read_probs, l2norm, knns2ordered_nbrs,
                   intdict2ndarray, Timer)

class ClusterDataset(object):
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg['knn_graph_path']

        self.k_at_hop = cfg['k_at_hop'] #k_at_hop=[100, 10], #200,10
        self.depth = len(self.k_at_hop) #e.g. 2 : legth for k_at_hop
        self.active_connection = cfg['active_connection']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True) #normalized
        self.is_sort_knns = cfg.get('is_sort_knns', True) #sorted knn
        self.is_test = cfg.get('is_test', False) #depends on the train or test
        
        with Timer('read meta and feature'):
            if label_path is not None:
                _, idx2lb = read_meta(label_path) #e.g. {0:0,1:0,2:0 ...} dict format
                self.inst_num = len(idx2lb) #instance num = # of data 
                self.labels = intdict2ndarray(idx2lb) #no. of class=8573 [   0.    0.    0. ... 8572. 8572. 8572.]
                self.ignore_label = False
            else:
                self.labels = None
                self.inst_num = -1
                self.ignore_label = True
            self.features = read_probs(feat_path, self.inst_num,
                                       self.feature_dim)#self.feature.shape:(576494, 256)
     
            if self.is_norm_feat:
                self.features = l2norm(self.features)
            if self.inst_num == -1:
                self.inst_num = self.features.shape[0]
            self.size = self.inst_num

        with Timer('read knn graph'):
            knns = np.load(knn_graph_path)['data']
            _, self.knn_graph = knns2ordered_nbrs(knns, sort=self.is_sort_knns)
        assert np.mean(self.k_at_hop) >= self.active_connection

        print('feature shape: {}, norm_feat: {}, sort_knns: {} '
              'k_at_hop: {}, active_connection: {}'.format(
                  self.features.shape, self.is_norm_feat, self.is_sort_knns,
                  self.k_at_hop, self.active_connection))
        print('labels shape:', self.labels.shape) #(584013,)
        print('knns_graph shape:', self.knn_graph.shape) #(584013, 80)

    def __getitem__(self, index):
        '''
        return the vertex feature and the adjacent matrix A, together
        with the indices of the center node and its 1-hop nodes
        '''
        if index is None or index > self.size:
            raise ValueError('index({}) is not in the range of {}'.format(
                index, self.size))

        center_node = index #428572 each index
        
        # hops[0] for 1-hop neighbors, hops[1] for 2-hop neighbors
        hops = []
        hops.append(set(self.knn_graph[center_node][1:]))
        
        # Actually we dont need the loop since the depth is fixed here,
        # But we still remain the code for further revision
        for d in range(1, self.depth):
            hops.append(set())
            for h in hops[-2]:
                hops[-1].update(set(self.knn_graph[h][1:self.k_at_hop[d] + 1]))

        hops_set = set([h for hop in hops for h in hop])
        hops_set.update([
            center_node,
        ])

        uniq_nodes = np.array(list(hops_set), dtype=np.int64)
        uniq_nodes_map = {j: i for i, j in enumerate(uniq_nodes)}

        center_idx = np.array([uniq_nodes_map[center_node]], dtype=np.int64)
        one_hop_idxs = np.array([uniq_nodes_map[i] for i in hops[0]],
                                dtype=np.int64)
        center_feat = self.features[center_node]
        feat = self.features[uniq_nodes]
        feat = feat - center_feat

        max_num_nodes = self.k_at_hop[0] * (self.k_at_hop[1] + 1) + 1
        num_nodes = len(uniq_nodes)

        # print('hops size[0]',len(hops[0]))
        # print('hops size[1]',len(hops[1]))
        # print('hops_set size',len(hops_set))
        # print('index[{}] num_node: max={} uniq={}'.format(index,max_num_nodes,num_nodes))
        # print('one_hop_idxs shape:',one_hop_idxs.shape)
        # print('ceter_node:',center_node)
        # print('feat shape',feat.shape)

        A = np.zeros([num_nodes, num_nodes], dtype=feat.dtype)

        res_num_nodes = max_num_nodes - num_nodes
        if res_num_nodes > 0:
            pad_feat = np.zeros([res_num_nodes, self.feature_dim],
                                dtype=feat.dtype)
            feat = np.concatenate([feat, pad_feat], axis=0)
      
        for node in uniq_nodes:
            neighbors = self.knn_graph[node, 1:self.active_connection + 1]
            for n in neighbors:
                if n in uniq_nodes:
                    i, j = uniq_nodes_map[node], uniq_nodes_map[n]
                    A[i, j] = 1
                    A[j, i] = 1

        D = A.sum(1, keepdims=True)
        A = A / D
        A_ = np.zeros([max_num_nodes, max_num_nodes], dtype=A.dtype)
        A_[:num_nodes, :num_nodes] = A

        if self.ignore_label:
            return (feat, A_, center_idx, one_hop_idxs)

        labels = self.labels[uniq_nodes]
        one_hop_labels = labels[one_hop_idxs]
        center_label = labels[center_idx]
        edge_labels = (center_label == one_hop_labels).astype(np.int64)

        # print('feat shape={}, A_ shape={}, one_hop_idxs shape={},'.format(feat.shape))


        if self.is_test:
            if res_num_nodes > 0:
                pad_nodes = np.zeros(res_num_nodes, dtype=uniq_nodes.dtype)
                uniq_nodes = np.concatenate([uniq_nodes, pad_nodes], axis=0)
            return (feat, A_, one_hop_idxs,
                    edge_labels), center_idx, uniq_nodes
        else:
            return (feat, A_, one_hop_idxs, edge_labels)

    def __len__(self):
        return self.size


In [6]:
from mmcv.runner import get_dist_info
from torch.utils.data import DataLoader

from dsgcn.datasets.sampler import (DistributedSampler,
                                    DistributedSequentialSampler)


def build_dataloader(dataset,
                     batch_size_per_gpu,
                     workers_per_gpu,
                     shuffle=False,
                     train=False,
                     **kwargs):
    rank, world_size = get_dist_info()
    if train:
        sampler = DistributedSampler(dataset, world_size, rank, shuffle)
    else:
        sampler = DistributedSequentialSampler(dataset, world_size, rank)
    batch_size = batch_size_per_gpu
    num_workers = workers_per_gpu

    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             sampler=sampler,
                             num_workers=num_workers,
                             pin_memory=False,
                             **kwargs)

    return data_loader


In [7]:
dataset = ClusterDataset(cfg.train_data)
train_dataloader = build_dataloader(dataset,
                         cfg.batch_size_per_gpu,
                         cfg.workers_per_gpu,
                         train=True,
                         shuffle=True)


[./data/labels/part0_train_cut.meta] #cls: 45, #inst: 2948
[Time] read meta and feature consumes 0.0139 s
[Time] read knn graph consumes 0.0456 s
feature shape: (2948, 256), norm_feat: True, sort_knns: True k_at_hop: [50, 10], active_connection: 10
labels shape: (2948,)
knns_graph shape: (2948, 80)


In [9]:
#print train_data.
i=0
for X, A, one_hop_idxs, labels in train_dataloader:
    print('one_hop_idxs shape={}\n one_hope_idxs e.g.={}\n\n'.format(one_hop_idxs.shape,one_hop_idxs))
    print('A shape={}\nA e.g.={}\n\n'.format(A.shape,A[0,:,:]))
    print('X shape={}\nx e.g.={}\n\n'.format(X.shape,X[0,:,:]))
    # print('pred shape={}\npred e.g.={}'.format(pred.shape,pred[0]))
    print('labels shape={}\nlabels e.g.={}\n\n'.format(labels.shape,labels[0]))
    print('labels.view(-1) shape={}\nlabels.view(-1) e.g.={}\n\n'.format(labels.view(-1).shape,labels.view(-1)[0]))
    
    i+=1
    if(i==1):
        break

one_hop_idxs shape=torch.Size([16, 79])
 one_hope_idxs e.g.=tensor([[  0,   1,   2,  ...,  91,  92,  94],
        [  0,   1,   2,  ...,  87,  88,  90],
        [  0,   1,   3,  ...,  88,  89,  90],
        ...,
        [  0,   1,   2,  ...,  82,  83,  84],
        [ 14,  25,  28,  ..., 224, 225, 242],
        [  0,   1,   2,  ...,  79,  80,  81]])


A shape=torch.Size([16, 551, 551])
A e.g.=tensor([[0.0000, 0.0476, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0833, 0.0000, 0.0833,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0455, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])


X shape=torch.Size([16, 551, 256])
x e.g.=tensor([[ 0.0080,  0.0042, -0.0511,  ...,  0.0111,  0.0433,  0.0533],
        [ 0.0620,  0.0388, -0.0499,  ...,  0.0383,  0.0207,  0.1123],
        [ 0.0088,  0.020

In [7]:
#simple matrix calculation and dimension check
AA=torch.randint(2,size=(2,5,5))
AA2=torch.randint(2,size=(2,5,5))
AA3=torch.randint(2,size=(5,5))
# XX=torch.rand(2,5,3)
XX2=torch.randint(3,size=(2,5,3))
XX3=torch.rand=(5,3)

mat1=AA3
mat2=AA2
mat3=XX3
mat4=XX2

print('mat1={} mat2={}'.format(mat1.shape, mat2.shape))
# print('mat3={} mat4={}'.format(mat3.shape, mat4.shape))
print('@@mat1@@\n', mat1)

# res=torch.matmul(mat1,mat2)
# res=torch.transpose(mat2,1,2)
res1 = AA3
res1 = (AA3 == 1).nonzero(as_tuple=True)
row = res1[0]
col = res1[1]
print('row', row)
print('col',col)
print('result :\n',row*col)
# print('res1 shape:',res1.shape)

# print('res2 result:', res2)
# print('res2 shape:',res2.shape)

mat1=torch.Size([5, 5]) mat2=torch.Size([2, 5, 5])
@@mat1@@
 tensor([[1, 0, 0, 0, 1],
        [0, 0, 1, 1, 0],
        [1, 0, 1, 0, 0],
        [1, 1, 1, 1, 1],
        [0, 1, 1, 1, 0]])
row tensor([0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4])
col tensor([0, 4, 2, 3, 0, 2, 0, 1, 2, 3, 4, 1, 2, 3])
result :
 tensor([ 0,  0,  2,  3,  0,  4,  0,  3,  6,  9, 12,  4,  8, 12])


In [30]:
torch.rand(5,2)


TypeError: 'tuple' object is not callable

In [29]:
#test print shape
class test_class:
    def __init__(self, input_dim=0, output_dim=0, **kwargs):
        self.W, self.vr, self.vs = self.define_weight(input_dim, output_dim)

    def define_weight(self, input_dim, output_dim):
        print(input_dim, output_dim)
        init_range = np.sqrt(6.0/(input_dim + output_dim))
        print(init_range)

        W_init = torch.rand(input_dim, output_dim)#*2*init_range - init_range
        print(W_init)
        W = nn.Parameter(W_init)
        v_init = torch.rand(output_dim,1)*2*init_range - init_range
        vr = nn.Parameter(v_init)
        vs = nn.Parameter(v_init)
        return W, vr, vs

    def Graph_Attention_Layer(self, A, X):
        #compute layer k feature matrix
        print('org X:',X.shape)
        X=torch.matmul(X,self.W)
        print('1st X:',X.shape)
        
        #compute Ms 
        Ms=torch.einsum('bnd,df->bnf', (X, self.vs))
        Ms=torch.matmul(A,Ms) # A*vs*H
        #compute Mr
        Mr=torch.einsum('bnd,df->bnf', (X, self.vr)) # vr*H
        Mr=torch.matmul(A,Mr) # A*vr*H
        Mr=torch.transpose(Mr,1,2)

        #compute attention 
        C=F.sigmoid(Ms+Mr)
        C=F.softmax(C)
        print('C:',C.shape)

        #multiply X and attention score
        X=torch.matmul(C, X)
        print('new X:',X.shape)

        return X

test=test_class(256, 128)
print('W:',test.W.shape)
# print('vr:',test.vr.shape)
# print('vs:',test.vs.shape)
# print('A:',A.shape)
# print('X:',X.shape)
print(test_model.Graph_Attention_Layer(A, X))


256 128
0.125


TypeError: 'tuple' object is not callable

## model

In [21]:

#define GCNConv
class GCNConv(nn.Module):
    def __init__(self, input_dim, output_dim, **kwargs):
        super(GCNConv,self).__init__(**kwargs)
        self.W, self.vr, self.vs = self.define_weight(input_dim, output_dim)

    def define_weight(self, input_dim, output_dim):
        init_range = np.sqrt(6.0/(input_dim + output_dim))
        W_init = torch.rand(input_dim, output_dim)*2*init_range - init_range
        W = nn.Parameter(W_init)
        v_init = torch.rand(output_dim,1)*2*init_range - init_range
        vr = nn.Parameter(v_init)
        vs = nn.Parameter(v_init)
        return W, vr, vs

    def Graph_Attention_Layer(self, A, X):
        #compute layer k feature matrix
        print('org X:', X.shape)
        X=torch.matmul(X, self.W)
        print('1st X:', X.shape)
        
        #compute Ms 
        Ms=torch.einsum('bnd,df->bnf', (X, self.vs))
        Ms=torch.matmul(A,Ms) # A*vs*H
        #compute Mr
        Mr=torch.einsum('bnd,df->bnf', (X, self.vr)) # vr*H
        Mr=torch.matmul(A,Mr) # A*vr*H
        Mr=torch.transpose(Mr,1,2)

        #compute attention 
        C=F.sigmoid(Ms+Mr)
        C=F.softmax(C)
        print('C:',C.shape)

        #multiply X and attention score
        X=torch.matmul(C, X)
        print('new X:',X.shape)

        return X

#define model
class gae(nn.Module):
    def __init__(self, feature_dim):
        super(gae, self).__init__()
        self.encode_gcn1 = GCNConv(feature_dim, 128)
        self.encode_gcn2 = GCNConv(128, 128)
        self.decode_gcn1 = GCNConv(128, 128)
        self.decode_gcn2 = GCNConv(128, 256)
        
    def encoder(self, A, X):
        X = self.encode_gcn1(A, X)
        X = self.encode_gcn2(A, X)
        return X

    def decoder(self, A, X):
        X = self.decode_gcn1(A, X)
        X = self.decode_gcn2(A, X)
        return X

    def forward(self, data, return_loss=False):
        X, A, one_hope_idxs, labels = data

        X_org = X
        X = encoder(A, X_org)

        self.X_latent = X

        X = decoder(A, X)
        X_recon = X

        feat_loss = torch.sqrt(torch.sum((X_org - X_recon)**2))
        graph_loss = 
    
    

    





SyntaxError: invalid syntax (<ipython-input-21-664988d1ed97>, line 72)

## Training execution

In [48]:
# run model with epoches
model = gae()
# loss_fn = lossfn()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
epoches = cfg.total_epoches

for t in range(epoches):
    X, A, _, _ = train_dataloader

    y_pred, loss = model(X)
    # loss = loss_fn(y_pred, y_true)
    if t % 100 == 99:
        print(t, loss.item)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')


NameError: name 'x' is not defined