## utils.py

In [3]:
import warnings
import networkx as nx
import numpy as np
import scipy.sparse as sp
from sklearn.metrics import roc_auc_score, average_precision_score
import copy


In [4]:
node1_start, node1_end = (0, 218)
node2_start, node2_end = (218, 13557)
node3_start, node3_end = (13557, 14266)
node4_start, node4_end = (14266, 67117)

node_trackers = []
node_trackers.append((0, 218))
node_trackers.append((218, 13557))
node_trackers.append((13557, 14266))
node_trackers.append((14266, 67117))


In [5]:
def determine_type(node_no):
        if node_no >= node1_start and node_no < node1_end:
            return 0
        elif node_no >= node2_start and node_no < node2_end:
            return 1
        elif node_no >= node3_start and node_no < node3_end:
            return 2
        else:
            return 3

In [6]:
class DataUtils:
    def __init__(self, graph_file, is_all=False, node_negative_distribution_temp = None, test_indices = None):
        self.test_indices = test_indices

        with np.load(graph_file, allow_pickle=True) as loader:
            loader = dict(loader)
            self.A = sp.csr_matrix((loader['adj_data'], loader['adj_indices'],
                               loader['adj_indptr']), shape=loader['adj_shape'])
            
            if loader['attr_data1'].all() != None:
                print("Attributes Found.")

                self.X1 = sp.csr_matrix((loader['attr_data1'], loader['attr_indices1'],
                                loader['attr_indptr1']), shape=loader['attr_shape1'])
                self.X2 = sp.csr_matrix((loader['attr_data2'], loader['attr_indices2'],
                                loader['attr_indptr2']), shape=loader['attr_shape2'])
                self.X3 = sp.csr_matrix((loader['attr_data3'], loader['attr_indices3'],
                                loader['attr_indptr3']), shape=loader['attr_shape3'])
                self.X4 = sp.csr_matrix((loader['attr_data4'], loader['attr_indices4'],
                                loader['attr_indptr4']), shape=loader['attr_shape4'])

            """ self.node_type1 = loader['node_type1']
            self.node_type2 = loader['node_type2'] """

            if 'labels' in loader.keys():
                self.labels = loader['labels']
            else:
                self.labels = None

            if not is_all and 'val_edges' in loader.keys():
                raise Exception("val not included yet")
                """ self.val_edges = loader['val_edges']
                self.val_ground_truth = loader['val_ground_truth']
                self.test_edges = loader['test_edges']
                self.test_ground_truth = loader['test_ground_truth'] """

            self.g = nx.from_scipy_sparse_matrix(self.A, create_using=nx.DiGraph())
            if type(self.test_indices) != type(None):
                self.g.remove_edges_from(list(self.g.in_edges(test_indices)))
                self.g.remove_edges_from(list(self.g.out_edges(test_indices)))

            self.num_of_nodes = self.g.number_of_nodes()
            self.num_of_edges = self.g.number_of_edges()
            self.edges_raw = self.g.edges(data=True)
            self.nodes_raw = self.g.nodes(data=True)

            #self.edge_distribution = np.array([attr['weight'] for _, _, attr in self.edges_raw], dtype=np.float32)
            self.edge_distribution = np.array([1/attr['weight'] if attr['weight']>0 else 0 for _, _, attr in self.edges_raw], dtype=np.float32)
            self.edge_distribution /= np.sum(self.edge_distribution)
            self.edge_sampling = AliasSampling(prob=self.edge_distribution)
            ''' self.node_negative_distribution = np.power(
                np.array([self.g.degree(node, weight='weight') for node, _ in self.nodes_raw], dtype=np.float32), 0.75) '''

            if type(node_negative_distribution_temp) != type(None):
                self.node_negative_distribution_temp = node_negative_distribution_temp
                #HERE WE HAVE TO BUILD TWO DIFFERENT NODE NEGATIVE SAMPLER
                sample_node0, sample_node1, _ = list(self.edges_raw)[0]
                node_type1, node_type2 = determine_type(sample_node0), determine_type(sample_node1)
                #print("node types: "+str(node_type1)+" "+str(node_type2))

                node_type1_start, node_type1_end = node_trackers[node_type1]
                node_type2_start, node_type2_end = node_trackers[node_type2]

                self.node_negative_distribution_temp_type1 = copy.deepcopy(self.node_negative_distribution_temp)
                self.node_negative_distribution_temp_type1[0: node_type1_start] = 0
                self.node_negative_distribution_temp_type1[node_type1_end: ] = 0
                print("sum 1 "+str(np.sum(self.node_negative_distribution_temp_type1)))
                
                if(np.sum(self.node_negative_distribution_temp_type1) == 0):
                  print("ZERO DIVIDE WARNING!!! ")
                self.node_negative_distribution_type1 = self.node_negative_distribution_temp_type1/np.sum(self.node_negative_distribution_temp_type1)
                print(self.node_negative_distribution_temp_type1)
                self.node_sampling_type1 = AliasSampling(prob=self.node_negative_distribution_type1)

                self.node_negative_distribution_temp_type2 = copy.deepcopy(self.node_negative_distribution_temp)
                self.node_negative_distribution_temp_type2[0: node_type2_start] = 0
                self.node_negative_distribution_temp_type2[node_type2_end: ] = 0
                print("sum 2 "+str(np.sum(self.node_negative_distribution_temp_type2)))

                self.node_negative_distribution_type2 = self.node_negative_distribution_temp_type2/np.sum(self.node_negative_distribution_temp_type2)
                self.node_sampling_type2 = AliasSampling(prob=self.node_negative_distribution_type2)

            else:
                print("Calculating global graph node properties...")
                g_temp = self.g.to_undirected().to_directed()
                print(g_temp.degree(0))
                self.node_negative_distribution_temp = np.power(
                    np.array([1/g_temp.degree(node, weight='weight') if g_temp.degree(node, weight='weight')>0 else 0 for node, _ in self.nodes_raw], dtype=np.float32), 0.75)
                ''' print(self.node_negative_distribution_temp[0:218])
                print("**********************")
                print(self.node_negative_distribution_temp[218:13557])
                print("**********************")
                print(self.node_negative_distribution_temp[13557:14266])
                print("**********************")
                print(self.node_negative_distribution_temp[14266:])
                print("++++++++++++++++++++++++++++++++++++++++++++++=====================")
 '''
                self.node_negative_distribution = self.node_negative_distribution_temp/np.sum(self.node_negative_distribution_temp)
                self.node_sampling = AliasSampling(prob=self.node_negative_distribution)

            ''' if node_negative_distribution_temp.all() == None:
                print("Calculating global graph node properties...")
                self.node_negative_distribution_temp = np.power(
                    np.array([1/self.g.degree(node, weight='weight') if self.g.degree(node, weight='weight')>0 else 0 for node, _ in self.nodes_raw], dtype=np.float32), 0.75)

                self.node_negative_distribution = self.node_negative_distribution_temp/np.sum(self.node_negative_distribution_temp)
                self.node_sampling = AliasSampling(prob=self.node_negative_distribution)
                
            else:
                self.node_negative_distribution_temp = node_negative_distribution_temp
                #HERE WE HAVE TO BUILD TWO DIFFERENT NODE NEGATIVE SAMPLER
                sample_node0, sample_node1, _ = self.edges_raw[0]
                node_type1, node_type2 = determine_type(sample_node0), determine_type(sample_node1)

                node_type1_start, node_type1_end = node_trackers[node_type1]
                node_type2_start, node_type2_end = node_trackers[node_type2]

                self.node_negative_distribution_temp_type1 = copy.deepcopy(self.node_negative_distribution_temp)
                self.node_negative_distribution_temp_type1[0: node_type1_start] = 0
                self.node_negative_distribution_temp_type1[node_type1_end: ] = 0
                self.node_negative_distribution_type1 = self.node_negative_distribution_temp_type1/np.sum(self.node_negative_distribution_temp_type1)
                self.node_sampling_type1 = AliasSampling(prob=self.node_negative_distribution_type1)

                self.node_negative_distribution_temp_type2 = copy.deepcopy(self.node_negative_distribution_temp)
                self.node_negative_distribution_temp_type2[0: node_type2_start] = 0
                self.node_negative_distribution_temp_type2[node_type2_end: ] = 0
                self.node_negative_distribution_type2 = self.node_negative_distribution_temp_type2/np.sum(self.node_negative_distribution_temp_type2)
                self.node_sampling_type2 = AliasSampling(prob=self.node_negative_distribution_type2) '''

            self.node_index = {}
            self.node_index_reversed = {}
            for index, (node, _) in enumerate(self.nodes_raw):
                if index != node:
                  raise Exception("Discrepancy!!!!")
                self.node_index[node] = index
                self.node_index_reversed[index] = node
            self.edges = [(self.node_index[u], self.node_index[v]) for u, v, _ in self.edges_raw]


    def fetch_next_batch(self, batch_size=16, K=5):
        
        edge_batch_index = self.edge_sampling.sampling(batch_size)
        
        u_i = []
        u_j = []
        label = []
        reverse = False
        if np.random.rand() > 0.5:
            reverse = True
        for edge_index in edge_batch_index:
            edge = self.edges[edge_index]
            #if self.g.__class__ == nx.Graph:
            #    if reverse == True:
            #        edge = (edge[1], edge[0])
            if reverse == True:
              edge = (edge[1], edge[0])
            u_i.append(edge[0])
            u_j.append(edge[1])
            label.append(1)
            node_type_1 = determine_type(edge[0])
            node_type_2 = determine_type(edge[1])

            for i in range(K):
                while True:
                    negative_node = self.node_sampling_type2.sampling() if reverse == False else self.node_sampling_type1.sampling()
                    if determine_type(negative_node) == node_type_2 and (not self.g.has_edge(self.node_index_reversed[negative_node], self.node_index_reversed[edge[0]])):
                        break
                    """ elif determine_type(negative_node) != node_type_2:
                      raise Exception("Problem in node sampling causing processing delay") """
                      #print("neg type: " + str(determine_type(negative_node)))
                      #print("pos type: " + str(node_type_2))

                u_i.append(edge[0])
                u_j.append(negative_node)
                label.append(-1)

        '''if len(np.intersect1d(u_i, self.test_indices)) + len(np.intersect1d(u_j, self.test_indices)) > 0:
            raise Exception("Error ensuring test set not separate from training. Inductivity couldn't be confirmed.")'''
        
        return u_i, u_j, label, node_type_1, node_type_2

    def embedding_mapping(self, embedding):
        return {node: embedding[self.node_index[node]] for node, _ in self.nodes_raw}

In [7]:
class AliasSampling:
    # Reference: LINE source code from https://github.com/snowkylin/line
    # Reference: https://en.wikipedia.org/wiki/Alias_method
    def __init__(self, prob):
        self.n = len(prob)
        self.U = np.array(prob) * self.n
        self.K = [i for i in range(len(prob))]
        overfull, underfull = [], []
        for i, U_i in enumerate(self.U):
            if U_i > 1:
                overfull.append(i)
            elif U_i < 1:
                underfull.append(i)
        while len(overfull) and len(underfull):
            i, j = overfull.pop(), underfull.pop()
            self.K[j] = i
            self.U[i] = self.U[i] - (1 - self.U[j])
            if self.U[i] > 1:
                overfull.append(i)
            elif self.U[i] < 1:
                underfull.append(i)

    def sampling(self, n=1):
        x = np.random.rand(n)
        i = np.floor(self.n * x)
        y = self.n * x - i
        i = i.astype(np.int32)
        res = [i[k] if y[k] < self.U[i[k]] else self.K[i[k]] for k in range(n)]
        if n == 1:
            return res[0]
        else:
            return res

In [8]:
def train_val_test_split(graph_file, p_test=0.10, p_val=0.05):
    with np.load(graph_file, allow_pickle=True) as loader:
        loader = dict(loader)
        A = sp.csr_matrix((loader['adj_data'], loader['adj_indices'],
                           loader['adj_indptr']), shape=loader['adj_shape'])

        X = sp.csr_matrix((loader['attr_data'], loader['attr_indices'],
                           loader['attr_indptr']), shape=loader['attr_shape'])

        if 'labels' in loader.keys():
            labels = loader['labels']
        else:
            labels = None

        train_ones, val_ones, val_zeros, test_ones, test_zeros = _train_val_test_split_adjacency(A=A, p_test=p_test, p_val=p_val, neg_mul=1, every_node=True, connected=False, undirected=(A != A.T).nnz == 0)
        if p_val > 0:
            val_edges = np.row_stack((val_ones, val_zeros))
            val_ground_truth = A[val_edges[:, 0], val_edges[:, 1]].A1
            val_ground_truth = np.where(val_ground_truth > 0, 1, val_ground_truth)
        if p_test > 0:
            test_edges = np.row_stack((test_ones, test_zeros))
            test_ground_truth = A[test_edges[:, 0], test_edges[:, 1]].A1
            test_ground_truth = np.where(test_ground_truth > 0, 1, test_ground_truth)
            if p_val == 0:
                val_edges = test_edges
                val_ground_truth = test_ground_truth
        A = edges_to_sparse(train_ones, A.shape[0])
    return A, X, labels, val_edges, val_ground_truth, test_edges, test_ground_truth


In [9]:
def _train_val_test_split_adjacency(A, p_val=0.10, p_test=0.05, seed=0, neg_mul=1,
                                    every_node=True, connected=False, undirected=False,
                                    use_edge_cover=True, set_ops=True, asserts=False):
    # Reference: G2G source code from https://github.com/abojchevski/graph2gauss
    assert p_val + p_test > 0
    assert A.min() == 0  # no negative edges
    assert A.diagonal().sum() == 0  # no self-loops
    assert not np.any(A.sum(0).A1 + A.sum(1).A1 == 0)  # no dangling nodes
    is_undirected = (A != A.T).nnz == 0

    if undirected:
        assert is_undirected  # make sure is directed
        A = sp.tril(A).tocsr()  #    consider only upper triangular
        A.eliminate_zeros()
    else:
        if is_undirected:
            warnings.warn('Graph appears to be undirected. Did you forgot to set undirected=True?')

    np.random.seed(seed)

    E = A.nnz
    N = A.shape[0]
    s_train = int(E * (1 - p_val - p_test))

    idx = np.arange(N)

    # hold some edges so each node appears at least once
    if every_node:
        if connected:
            assert sp.csgraph.connected_components(A)[0] == 1  # make sure original graph is connected
            A_hold = sp.csgraph.minimum_spanning_tree(A)
        else:
            A.eliminate_zeros()  # makes sure A.tolil().rows contains only indices of non-zero elements
            d = A.sum(1).A1

            if use_edge_cover:
                hold_edges = edge_cover(A)

                # make sure the training percentage is not smaller than len(edge_cover)/E when every_node is set to True
                min_size = hold_edges.shape[0]
                if min_size > s_train:
                    raise ValueError('Training percentage too low to guarantee every node. Min train size needed {:.2f}'
                                     .format(min_size / E))
            else:
                # make sure the training percentage is not smaller than N/E when every_node is set to True
                if N > s_train:
                    raise ValueError('Training percentage too low to guarantee every node. Min train size needed {:.2f}'
                                     .format(N / E))

                hold_edges_d1 = np.column_stack(
                    (idx[d > 0], np.row_stack(map(np.random.choice, A[d > 0].tolil().rows))))

                if np.any(d == 0):
                    hold_edges_d0 = np.column_stack((np.row_stack(map(np.random.choice, A[:, d == 0].T.tolil().rows)),
                                                     idx[d == 0]))
                    hold_edges = np.row_stack((hold_edges_d0, hold_edges_d1))
                else:
                    hold_edges = hold_edges_d1

            if asserts:
                assert np.all(A[hold_edges[:, 0], hold_edges[:, 1]])
                assert len(np.unique(hold_edges.flatten())) == N

            A_hold = edges_to_sparse(hold_edges, N)

        A_hold[A_hold > 1] = 1
        A_hold.eliminate_zeros()
        A_sample = A - A_hold

        s_train = s_train - A_hold.nnz
    else:
        A_sample = A

    idx_ones = np.random.permutation(A_sample.nnz)
    ones = np.column_stack(A_sample.nonzero())
    train_ones = ones[idx_ones[:s_train]]
    test_ones = ones[idx_ones[s_train:]]

    # return back the held edges
    if every_node:
        train_ones = np.row_stack((train_ones, np.column_stack(A_hold.nonzero())))

    n_test = len(test_ones) * neg_mul
    if set_ops:
        # generate slightly more completely random non-edge indices than needed and discard any that hit an edge
        # much faster compared a while loop
        # in the future: estimate the multiplicity (currently fixed 1.3/2.3) based on A_obs.nnz
        if undirected:
            random_sample = np.random.randint(0, N, [int(2.3 * n_test), 2])
            random_sample = random_sample[random_sample[:, 0] > random_sample[:, 1]] #only upper triangle
        else:
            random_sample = np.random.randint(0, N, [int(1.3 * n_test), 2])
            random_sample = random_sample[random_sample[:, 0] != random_sample[:, 1]]

        # discard ones
        random_sample = random_sample[A[random_sample[:, 0], random_sample[:, 1]].A1 == 0]
        # discard duplicates
        random_sample = random_sample[np.unique(random_sample[:, 0] * N + random_sample[:, 1], return_index=True)[1]]
        # only take as much as needed
        test_zeros = np.row_stack(random_sample)[:n_test]
        assert test_zeros.shape[0] == n_test
    else:
        test_zeros = []
        while len(test_zeros) < n_test:
            i, j = np.random.randint(0, N, 2)
            if A[i, j] == 0 and (not undirected or i > j) and (i, j) not in test_zeros:
                test_zeros.append((i, j))
        test_zeros = np.array(test_zeros)

    # split the test set into validation and test set
    s_val_ones = int(len(test_ones) * p_val / (p_val + p_test))
    s_val_zeros = int(len(test_zeros) * p_val / (p_val + p_test))

    val_ones = test_ones[:s_val_ones]
    test_ones = test_ones[s_val_ones:]

    val_zeros = test_zeros[:s_val_zeros]
    test_zeros = test_zeros[s_val_zeros:]

    if undirected:
        # put (j, i) edges for every (i, j) edge in the respective sets and form back original A
        symmetrize = lambda x: np.row_stack((x, np.column_stack((x[:, 1], x[:, 0]))))
        train_ones = symmetrize(train_ones)
        val_ones = symmetrize(val_ones)
        val_zeros = symmetrize(val_zeros)
        test_ones = symmetrize(test_ones)
        test_zeros = symmetrize(test_zeros)
        A = A.maximum(A.T)

    if asserts:
        set_of_train_ones = set(map(tuple, train_ones))
        assert train_ones.shape[0] + test_ones.shape[0] + val_ones.shape[0] == A.nnz
        assert (edges_to_sparse(np.row_stack((train_ones, test_ones, val_ones)), N) != A).nnz == 0
        assert set_of_train_ones.intersection(set(map(tuple, test_ones))) == set()
        assert set_of_train_ones.intersection(set(map(tuple, val_ones))) == set()
        assert set_of_train_ones.intersection(set(map(tuple, test_zeros))) == set()
        assert set_of_train_ones.intersection(set(map(tuple, val_zeros))) == set()
        assert len(set(map(tuple, test_zeros))) == len(test_ones) * neg_mul
        assert len(set(map(tuple, val_zeros))) == len(val_ones) * neg_mul
        assert not connected or sp.csgraph.connected_components(A_hold)[0] == 1
        assert not every_node or ((A_hold - A) > 0).sum() == 0

    return train_ones, val_ones, val_zeros, test_ones, test_zeros

In [10]:
def edge_cover(A):
    # Reference: G2G source code from https://github.com/abojchevski/graph2gauss
    N = A.shape[0]
    d_in = A.sum(0).A1
    d_out = A.sum(1).A1

    # make sure to include singleton nodes (nodes with one incoming or one outgoing edge)
    one_in = np.where((d_in == 1) & (d_out == 0))[0]
    one_out = np.where((d_in == 0) & (d_out == 1))[0]

    edges = []
    edges.append(np.column_stack((A[:, one_in].argmax(0).A1, one_in)))
    edges.append(np.column_stack((one_out, A[one_out].argmax(1).A1)))
    edges = np.row_stack(edges)

    edge_cover_set = set(map(tuple, edges))
    nodes = set(edges.flatten())

    # greedly add other edges such that both end-point are not yet in the edge_cover_set
    cands = np.column_stack(A.nonzero())
    for u, v in cands[d_in[cands[:, 1]].argsort()]:
        if u not in nodes and v not in nodes and u != v:
            edge_cover_set.add((u, v))
            nodes.add(u)
            nodes.add(v)
        if len(nodes) == N:
            break

    # add a single edge for the rest of the nodes not covered so far
    not_covered = np.setdiff1d(np.arange(N), list(nodes))
    edges = [list(edge_cover_set)]
    not_covered_out = not_covered[d_out[not_covered] > 0]

    if len(not_covered_out) > 0:
        edges.append(np.column_stack((not_covered_out, A[not_covered_out].argmax(1).A1)))

    not_covered_in = not_covered[d_out[not_covered] == 0]
    if len(not_covered_in) > 0:
        edges.append(np.column_stack((A[:, not_covered_in].argmax(0).A1, not_covered_in)))

    edges = np.row_stack(edges)

    # make sure that we've indeed computed an edge_cover
    # assert A[edges[:, 0], edges[:, 1]].sum() == len(edges)
    assert len(set(map(tuple, edges))) == len(edges)
    assert len(np.unique(edges)) == N

    return edges

In [11]:
def edges_to_sparse(edges, N, values=None):
    if values is None:
        values = np.ones(edges.shape[0])

    return sp.coo_matrix((values, (edges[:, 0], edges[:, 1])), shape=(N, N)).tocsr()

In [12]:
def score_link_prediction(labels, scores):
    return roc_auc_score(labels, scores), average_precision_score(labels, scores)

## model.py

In [16]:
! pip install -q -U tensorflow_addons

[?25l[K     |▎                               | 10 kB 16.8 MB/s eta 0:00:01[K     |▋                               | 20 kB 4.4 MB/s eta 0:00:01[K     |█                               | 30 kB 6.2 MB/s eta 0:00:01[K     |█▏                              | 40 kB 4.5 MB/s eta 0:00:01[K     |█▌                              | 51 kB 4.4 MB/s eta 0:00:01[K     |█▉                              | 61 kB 5.2 MB/s eta 0:00:01[K     |██                              | 71 kB 5.0 MB/s eta 0:00:01[K     |██▍                             | 81 kB 4.9 MB/s eta 0:00:01[K     |██▊                             | 92 kB 5.2 MB/s eta 0:00:01[K     |███                             | 102 kB 5.0 MB/s eta 0:00:01[K     |███▎                            | 112 kB 5.0 MB/s eta 0:00:01[K     |███▋                            | 122 kB 5.0 MB/s eta 0:00:01[K     |███▉                            | 133 kB 5.0 MB/s eta 0:00:01[K     |████▏                           | 143 kB 5.0 MB/s eta 0:00:01[K    

In [22]:
import tensorflow as tf 
import scipy.sparse as sp 
import numpy as np 
#import cyclic_learning_rate.clr as clr

seed = 42

In [23]:
def sparse_feeder(M):
    M = sp.coo_matrix(M, dtype = np.float32)
    return np.vstack((M.row, M.col)).T, M.data, M.shape

In [24]:
class GSNE:
    def __init__(self, args):
        tf.set_random_seed(seed)
        self.X1 = tf.SparseTensor(*sparse_feeder(args.X1))
        self.X2 = tf.SparseTensor(*sparse_feeder(args.X2))
        self.X3 = tf.SparseTensor(*sparse_feeder(args.X3))
        self.X4 = tf.SparseTensor(*sparse_feeder(args.X4))

        self.N1, self.D1 = args.X1.shape
        self.N2, self.D2 = args.X2.shape
        self.N3, self.D3 = args.X3.shape
        self.N4, self.D4 = args.X4.shape  

        self.L = args.embedding_dim

        #PLEASE ENSURE THE LAST LAYER DIMENSION IS SAME FOR EVERYONE

        self.n_hidden1 = [6, 12, 28]
        self.n_hidden2 = [10, 16, 28]
        self.n_hidden3 = [10, 16, 28]
        self.n_hidden4 = [42, 36, 28]

        '''self.n_hidden1 = [14]
        self.n_hidden2 = [14]
        self.n_hidden3 = [14]
        self.n_hidden4 = [14]'''

        self.u_i = tf.placeholder(name='u_i', dtype=tf.int32, shape=[args.batch_size * (args.K + 1)])
        self.u_j = tf.placeholder(name='u_j', dtype=tf.int32, shape=[args.batch_size * (args.K + 1)])
        self.label = tf.placeholder(name='label', dtype=tf.float32, shape=[args.batch_size * (args.K + 1)])
        self.node_type1 = tf.placeholder(name='node_type1', dtype=tf.int32, shape = ())
        self.node_type2 = tf.placeholder(name='node_type2', dtype=tf.int32, shape = ())

        self.__create_model(args.proximity)
        self.val_set = False
        
        tf.train.create_global_step()

        # softmax loss
        
        self.energy = -self.energy_kl(self.u_i, self.u_j, args.proximity, self.node_type1, self.node_type2)
        self.loss = -tf.reduce_mean(tf.log_sigmoid(self.label * self.energy))
        tf.summary.scalar('loss', self.loss)
        print(args.learning_rate)

        '''for cyclic learning rate'''
        global_step = tf.train.get_global_step()
        #learning_rate = tf.train.exponential_decay((1e-9), global_step=global_step,decay_steps=10, decay_rate=1.04)
        learning_rate = clr.cyclic_learning_rate(global_step=global_step, learning_rate=1e-4,
                         max_lr=19e-5,
                         step_size=100, mode='exp_range', gamma = 0.99999)
        original_optimizer = tf.train.AdamOptimizer(learning_rate)
        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar("current_step",global_step)


        ###########################################################


        #original_optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        self.optimizer = tf.contrib.estimator.clip_gradients_by_norm(original_optimizer, clip_norm=5.0)

        

        ''' tvs = tf.trainable_variables()
        accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in tvs]                                        
        self.zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]
        gvs = self.optimizer.compute_gradients(self.loss, tvs)
        self.accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(gvs)]

        #After getting all the gradients in the five steps, we calculate the train step
        self.train_step = self.optimizer.apply_gradients([(accum_vars[i], gv[1]) for i, gv in enumerate(gvs)])
 '''
        self.train_op = self.optimizer.minimize(self.loss, global_step = global_step)
        self.merged_summary = tf.summary.merge_all()
        

    def __create_model(self, proximity):
        w_init = tf.contrib.layers.xavier_initializer
        #w_init = tf.random_normal_initializer(mean=0.0, stddev=0.1, seed=None)
        #w_init = tf.keras.initializers.random_normal
        sizes1 = [self.D1] + self.n_hidden1
        sizes2 = [self.D2] + self.n_hidden2
        sizes3 = [self.D3] + self.n_hidden3
        sizes4 = [self.D4] + self.n_hidden4

        #feature 1
        TRAINABLE = True
        with tf.name_scope("Train"):
          for i in range(1, len(sizes1)):
             with tf.name_scope("enc{}".format(i)):
              W = tf.get_variable(name='W1{}'.format(i), shape=[sizes1[i - 1], sizes1[i]], dtype=tf.float32,
                                  initializer=w_init(), trainable = TRAINABLE)
              b = tf.get_variable(name='b1{}'.format(i), shape=[sizes1[i]], dtype=tf.float32, initializer=w_init(), trainable = TRAINABLE)

              if i == 1:
                  encoded1 = tf.sparse_tensor_dense_matmul(self.X1, W) + b
              else:
                  encoded1 = tf.matmul(encoded1, W) + b

              encoded1 = tf.nn.relu(encoded1)

              tf.summary.histogram('Weight', W)
              tf.summary.histogram('bias', b)
              tf.summary.histogram('activations', encoded1)


        #encoded1 = tf.Print(encoded1, [encoded1], message = "feature 1 encoder triggered")

        #feature 2
        TRAINABLE = True
        with tf.name_scope("Region"):
          for i in range(1, len(sizes2)):
            with tf.name_scope("enc{}".format(i)):
              W = tf.get_variable(name='W2{}'.format(i), shape=[sizes2[i - 1], sizes2[i]], dtype=tf.float32,
                                  initializer=w_init(), trainable = TRAINABLE)
              b = tf.get_variable(name='b2{}'.format(i), shape=[sizes2[i]], dtype=tf.float32, initializer=w_init(), trainable = TRAINABLE)

              if i == 1:
                  encoded2 = tf.sparse_tensor_dense_matmul(self.X2, W) + b
              else:
                  encoded2 = tf.matmul(encoded2, W) + b

              encoded2 = tf.nn.relu(encoded2)

              tf.summary.histogram('Weight', W)
              tf.summary.histogram('bias', b)
              tf.summary.histogram('activations', encoded2)

        #encoded2 = tf.Print(encoded2, [encoded2], message = "feature 2 encoder triggered")

        #feature 3
        TRAINABLE = True
        with tf.name_scope("School"):
          for i in range(1, len(sizes3)):
            with tf.name_scope("enc{}".format(i)):
              W = tf.get_variable(name='W3{}'.format(i), shape=[sizes3[i - 1], sizes3[i]], dtype=tf.float32,
                                  initializer=w_init(), trainable = TRAINABLE)
              b = tf.get_variable(name='b3{}'.format(i), shape=[sizes3[i]], dtype=tf.float32, initializer=w_init(), trainable = TRAINABLE)

              if i == 1:
                  encoded3 = tf.sparse_tensor_dense_matmul(self.X3, W) + b
              else:
                  encoded3 = tf.matmul(encoded3, W) + b

              encoded3 = tf.nn.relu(encoded3)

              tf.summary.histogram('Weight', W)
              tf.summary.histogram('bias', b)
              tf.summary.histogram('activations', encoded3)


        #encoded3 = tf.Print(encoded3, [encoded3], message = "feature 3 encoder triggered")

        #feature 4
        TRAINABLE = True
        with tf.name_scope("House"):
          for i in range(1, len(sizes4)):
            with tf.name_scope("enc{}".format(i)):
              W = tf.get_variable(name='W4{}'.format(i), shape=[sizes4[i - 1], sizes4[i]], dtype=tf.float32,
                                  initializer=w_init(), trainable = TRAINABLE)
              b = tf.get_variable(name='b4{}'.format(i), shape=[sizes4[i]], dtype=tf.float32, initializer=w_init(), trainable = TRAINABLE)

              if i == 1:
                  encoded4 = tf.sparse_tensor_dense_matmul(self.X4, W) + b
              else:
                  encoded4 = tf.matmul(encoded4, W) + b

              encoded4 = tf.nn.relu(encoded4)

              tf.summary.histogram('Weight', W)
              tf.summary.histogram('bias', b)
              tf.summary.histogram('activations', encoded4)

        #encoded4 = tf.Print(encoded4, [encoded4], message = "feature 4 encoder triggered")

        #W-MU/SIGMA AND B-MU/SIGMA IS SHARED BETWEEN ALL FEATURES
        #SHAPE: THOUGH WE USED SIZES1[-1], KEEP IN MIND THAT'S SAME FOR ALL SHAPES

        """ W_mu = tf.get_variable(name='W_mu', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init())
        b_mu = tf.get_variable(name='b_mu', shape=[self.L], dtype=tf.float32, initializer=w_init()) 
        self.embedding1 = tf.matmul(encoded1, W_mu) + b_mu
        self.embedding2 = tf.matmul(encoded2, W_mu) + b_mu
        self.embedding3 = tf.matmul(encoded3, W_mu) + b_mu
        self.embedding4 = tf.matmul(encoded4, W_mu) + b_mu

        with tf.name_scope("shared"):
          with tf.name_scope("mu"):
            tf.summary.histogram('Weight', W_mu)
            tf.summary.histogram('bias', b_mu)
            mu_embed_activations = [self.embedding1, self.embedding2, self.embedding3, self.embedding4]
            tf.summary.histogram('activations', tf.concat(mu_embed_activations, 0)) """

      
        '''self.embedding1 = tf.nn.sigmoid(tf.matmul(encoded1, W_mu) + b_mu) + 1 + 1e-14
        self.embedding2 = tf.nn.sigmoid(tf.matmul(encoded2, W_mu) + b_mu) + 1 + 1e-14
        self.embedding3 = tf.nn.sigmoid(tf.matmul(encoded3, W_mu) + b_mu) + 1 + 1e-14
        self.embedding4 = tf.nn.sigmoid(tf.matmul(encoded4, W_mu) + b_mu) + 1 + 1e-14'''

        """ W_sigma = tf.get_variable(name='W_sigma', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init())
        b_sigma = tf.get_variable(name='b_sigma', shape=[self.L], dtype=tf.float32, initializer=w_init())
        log_sigma1 = tf.matmul(encoded1, W_sigma) + b_sigma
        self.sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14
        #self.sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14

        log_sigma2 = tf.matmul(encoded2, W_sigma) + b_sigma
        self.sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14
        #self.sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14

        log_sigma3 = tf.matmul(encoded3, W_sigma) + b_sigma
        self.sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14
        #self.sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14

        log_sigma4 = tf.matmul(encoded4, W_sigma) + b_sigma
        self.sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14
        #self.sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14

        with tf.name_scope("shared"):
          with tf.name_scope("sigma"):
            tf.summary.histogram('Weight', W_sigma)
            tf.summary.histogram('bias', b_sigma)
            sigma_embed_activations = [self.sigma1, self.sigma2, self.sigma3, self.sigma4]
            tf.summary.histogram('activations', tf.concat(sigma_embed_activations, 0)) """ 

        ##############EXPERIMENTAL FEATURES. PLEASE REMOVE IF DOESN'T WORK############################################

        W_mu1 = tf.get_variable(name='W_mu1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init())
        b_mu1 = tf.get_variable(name='b_mu1', shape=[40], dtype=tf.float32, initializer=w_init())

        W_mu2 = tf.get_variable(name='W_mu2', shape=[40, self.L], dtype=tf.float32, initializer=w_init())
        b_mu2 = tf.get_variable(name='b_mu2', shape=[self.L], dtype=tf.float32, initializer=w_init())
        
        embedding1_t = tf.nn.relu(tf.matmul(encoded1, W_mu1) + b_mu1)
        self.embedding1 = tf.matmul(embedding1_t, W_mu2) + b_mu2

        embedding2_t = tf.nn.relu(tf.matmul(encoded2, W_mu1) + b_mu1)
        self.embedding2 = tf.matmul(embedding2_t, W_mu2) + b_mu2

        embedding3_t = tf.nn.relu(tf.matmul(encoded3, W_mu1) + b_mu1)
        self.embedding3 = tf.matmul(embedding3_t, W_mu2) + b_mu2

        embedding4_t = tf.nn.relu(tf.matmul(encoded4, W_mu1) + b_mu1)
        self.embedding4 = tf.matmul(embedding4_t, W_mu2) + b_mu2

        W_sigma1 = tf.get_variable(name='W_sigma1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init())
        W_sigma2 = tf.get_variable(name='W_sigma2', shape=[40, self.L], dtype=tf.float32, initializer=w_init())

        b_sigma1 = tf.get_variable(name='b_sigma1', shape=[40], dtype=tf.float32, initializer=w_init())
        b_sigma2 = tf.get_variable(name='b_sigma2', shape=[self.L], dtype=tf.float32, initializer=w_init())

        log_sigma1t = tf.nn.relu(tf.matmul(encoded1, W_sigma1) + b_sigma1)
        log_sigma1 = tf.matmul(log_sigma1t, W_sigma2) + b_sigma2
        self.sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14
        #self.sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14

        log_sigma2t = tf.nn.relu(tf.matmul(encoded2, W_sigma1) + b_sigma1)
        log_sigma2 = tf.matmul(log_sigma2t, W_sigma2) + b_sigma2
        self.sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14
        #self.sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14

        log_sigma3t = tf.nn.relu(tf.matmul(encoded3, W_sigma1) + b_sigma1)
        log_sigma3 = tf.matmul(log_sigma3t, W_sigma2) + b_sigma2
        self.sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14
        #self.sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14

        log_sigma4t = tf.nn.relu(tf.matmul(encoded4, W_sigma1) + b_sigma1)
        log_sigma4 = tf.matmul(log_sigma4t, W_sigma2) + b_sigma2
        self.sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14 
        #self.sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14

        ########################################################################################################################
        #####################END OF EXPERIMENTAL, DELETE IF DOESN'T WORK########################################################
        #######################################################################################################################
        


        if proximity == 'second-order':
            #feature 1

            for i in range(1, len(sizes1)):
                W = tf.get_variable(name='W_ctx1{}'.format(i), shape=[sizes1[i - 1], sizes1[i]], dtype=tf.float32,
                                    initializer=w_init())
                b = tf.get_variable(name='b_ctx1{}'.format(i), shape=[sizes1[i]], dtype=tf.float32, initializer=w_init())

                if i == 1:
                    encoded1 = tf.sparse_tensor_dense_matmul(self.X1, W) + b
                else:
                    encoded1 = tf.matmul(encoded1, W) + b

                encoded1 = tf.nn.relu(encoded1)

            #feature 2

            for i in range(1, len(sizes2)):
                W = tf.get_variable(name='W_ctx2{}'.format(i), shape=[sizes2[i - 1], sizes2[i]], dtype=tf.float32,
                                    initializer=w_init())
                b = tf.get_variable(name='b_ctx2{}'.format(i), shape=[sizes2[i]], dtype=tf.float32, initializer=w_init())

                if i == 1:
                    encoded2 = tf.sparse_tensor_dense_matmul(self.X2, W) + b
                else:
                    encoded2 = tf.matmul(encoded2, W) + b

                encoded2 = tf.nn.relu(encoded2)

            #feature 3

            for i in range(1, len(sizes3)):
                W = tf.get_variable(name='W_ctx3{}'.format(i), shape=[sizes3[i - 1], sizes3[i]], dtype=tf.float32,
                                    initializer=w_init())
                b = tf.get_variable(name='b_ctx3{}'.format(i), shape=[sizes3[i]], dtype=tf.float32, initializer=w_init())

                if i == 1:
                    encoded3 = tf.sparse_tensor_dense_matmul(self.X3, W) + b
                else:
                    encoded3 = tf.matmul(encoded3, W) + b

                encoded3 = tf.nn.relu(encoded3)

            #feature 4

            for i in range(1, len(sizes4)):
                W = tf.get_variable(name='W_ctx4{}'.format(i), shape=[sizes4[i - 1], sizes4[i]], dtype=tf.float32,
                                    initializer=w_init())
                b = tf.get_variable(name='b_ctx4{}'.format(i), shape=[sizes4[i]], dtype=tf.float32, initializer=w_init())

                if i == 1:
                    encoded4 = tf.sparse_tensor_dense_matmul(self.X4, W) + b
                else:
                    encoded4 = tf.matmul(encoded4, W) + b

                encoded4 = tf.nn.relu(encoded4)
            ################ USE INTERCHANGABLY WITH THE HIGHER DIMENSION#####################################################
            """ W_mu = tf.get_variable(name='W_mu_ctx', shape=[sizes1[-1], self.L], dtype=tf.float32, initializer=w_init())
            b_mu = tf.get_variable(name='b_mu_ctx', shape=[self.L], dtype=tf.float32, initializer=w_init())
            
            self.ctx_mu1 = tf.matmul(encoded1, W_mu) + b_mu
            self.ctx_mu2 = tf.matmul(encoded2, W_mu) + b_mu
            self.ctx_mu3 = tf.matmul(encoded3, W_mu) + b_mu
            self.ctx_mu4 = tf.matmul(encoded4, W_mu) + b_mu

            ''' self.ctx_mu1 = tf.nn.sigmoid(tf.matmul(encoded1, W_mu) + b_mu) + 1 + 1e-14
            self.ctx_mu2 = tf.nn.sigmoid(tf.matmul(encoded2, W_mu) + b_mu) + 1 + 1e-14
            self.ctx_mu3 = tf.nn.sigmoid(tf.matmul(encoded3, W_mu) + b_mu) + 1 + 1e-14 

            self.ctx_mu4 = tf.nn.sigmoid(tf.matmul(encoded4, W_mu) + b_mu) + 1 + 1e-14 '''

            W_sigma = tf.get_variable(name='W_sigma_ctx', shape=[sizes1[-1], self.L], dtype=tf.float32,
                                      initializer=w_init())
            b_sigma = tf.get_variable(name='b_sigma_ctx', shape=[self.L], dtype=tf.float32, initializer=w_init())
            
            log_sigma1 = tf.matmul(encoded1, W_sigma) + b_sigma
            self.ctx_sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14
            #self.ctx_sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14

            log_sigma2 = tf.matmul(encoded2, W_sigma) + b_sigma
            self.ctx_sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14
            #self.ctx_sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14

            log_sigma3 = tf.matmul(encoded3, W_sigma) + b_sigma
            self.ctx_sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14
            #self.ctx_sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14


            log_sigma4 = tf.matmul(encoded4, W_sigma) + b_sigma
            self.ctx_sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14
            #self.ctx_sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14 """

            #############HIGHER DIMENSION VERSION ##############################################
            W_mu1 = tf.get_variable(name='W_mu_ctx1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init())
            b_mu1 = tf.get_variable(name='b_mu_ctx1', shape=[40], dtype=tf.float32, initializer=w_init())
            
            W_mu2 = tf.get_variable(name='W_mu_ctx2', shape=[40, self.L], dtype=tf.float32, initializer=w_init())
            b_mu2 = tf.get_variable(name='b_mu_ctx2', shape=[self.L], dtype=tf.float32, initializer=w_init())
            
            ctx_mu1_t = tf.nn.relu(tf.matmul(encoded1, W_mu1) + b_mu1)
            self.ctx_mu1 = tf.matmul(ctx_mu1_t, W_mu2) + b_mu2
            
            ctx_mu2_t = tf.nn.relu(tf.matmul(encoded2, W_mu1) + b_mu1)
            self.ctx_mu2 = tf.matmul(ctx_mu2_t, W_mu2) + b_mu2
            
            ctx_mu3_t = tf.nn.relu(tf.matmul(encoded3, W_mu1) + b_mu1)
            self.ctx_mu3 = tf.matmul(ctx_mu3_t, W_mu2) + b_mu2
            
            ctx_mu4_t = tf.nn.relu(tf.matmul(encoded4, W_mu1) + b_mu1)
            self.ctx_mu4 = tf.matmul(ctx_mu4_t, W_mu2) + b_mu2

            W_sigma1 = tf.get_variable(name='W_sigma_ctx1', shape=[sizes1[-1], 40], dtype=tf.float32, initializer=w_init())
            W_sigma2 = tf.get_variable(name='W_sigma_ctx2', shape=[40, self.L], dtype=tf.float32, initializer=w_init())
            
            b_sigma1 = tf.get_variable(name='b_sigma_ctx1', shape=[40], dtype=tf.float32, initializer=w_init())
            b_sigma2 = tf.get_variable(name='b_sigma_ctx2', shape=[self.L], dtype=tf.float32, initializer=w_init())
            
            log_sigma1t = tf.nn.relu(tf.matmul(encoded1, W_sigma1) + b_sigma1)
            log_sigma1 = tf.matmul(log_sigma1t, W_sigma2) + b_sigma2
            self.ctx_sigma1 = tf.nn.elu(log_sigma1) + 1 + 1e-14
            #self.ctx_sigma1 = tf.nn.sigmoid(log_sigma1) + 1 + 1e-14
			
            log_sigma2t = tf.nn.relu(tf.matmul(encoded2, W_sigma1) + b_sigma1)
            log_sigma2 = tf.matmul(log_sigma2t, W_sigma2) + b_sigma2
            self.ctx_sigma2 = tf.nn.elu(log_sigma2) + 1 + 1e-14
            #self.ctx_sigma2 = tf.nn.sigmoid(log_sigma2) + 1 + 1e-14

            log_sigma3t = tf.nn.relu(tf.matmul(encoded3, W_sigma1) + b_sigma1)
            log_sigma3 = tf.matmul(log_sigma3t, W_sigma2) + b_sigma2
            self.ctx_sigma3 = tf.nn.elu(log_sigma3) + 1 + 1e-14
            #self.ctx_sigma3 = tf.nn.sigmoid(log_sigma3) + 1 + 1e-14

            log_sigma4t = tf.nn.relu(tf.matmul(encoded4, W_sigma1) + b_sigma1)
            log_sigma4 = tf.matmul(log_sigma4t, W_sigma2) + b_sigma2
            self.ctx_sigma4 = tf.nn.elu(log_sigma4) + 1 + 1e-14
            #self.ctx_sigma4 = tf.nn.sigmoid(log_sigma4) + 1 + 1e-14
            #########################################DEEPER MODEL END##################################


    def energy_kl(self, u_i, u_j, proximity, node_type1, node_type2):
        def f1():
          print("f1") 
          return tf.gather(self.embedding1, u_i), tf.gather(self.sigma1, u_i)
        def f2(): 
          print("f2")
          return tf.gather(self.embedding2, u_i), tf.gather(self.sigma2, u_i)
        def f3(): 
          print("f3")
          return tf.gather(self.embedding3, u_i), tf.gather(self.sigma3, u_i)
        def f4(): 
          print("f4")
          return tf.gather(self.embedding4, u_i), tf.gather(self.sigma4, u_i)

        def f5(): 
          print("f5")
          return tf.gather(self.ctx_mu1, u_j), tf.gather(self.ctx_sigma1, u_j)
        def f6(): 
          print("f6")
          return tf.gather(self.ctx_mu2, u_j), tf.gather(self.ctx_sigma2, u_j)
        def f7(): 
          print("f7")
          return tf.gather(self.ctx_mu3, u_j), tf.gather(self.ctx_sigma3, u_j)
        def f8(): 
          print("f8")
          return tf.gather(self.ctx_mu4, u_j), tf.gather(self.ctx_sigma4, u_j)

        def f9():
          print("f9") 
          return tf.gather(self.embedding1, u_j), tf.gather(self.sigma1, u_j)
        def f10(): 
          print("f10")
          return tf.gather(self.embedding2, u_j), tf.gather(self.sigma2, u_j)
        def f11(): 
          print("f11")
          return tf.gather(self.embedding3, u_j), tf.gather(self.sigma3, u_j)
        def f12(): 
          print("f12")
          return tf.gather(self.embedding4, u_j), tf.gather(self.sigma4, u_j)

        mu_i, sigma_i = tf.case([(tf.equal(node_type1, 0), f1), (tf.equal(node_type1, 1), f2),
                              (tf.equal(node_type1, 2), f3), (tf.equal(node_type1, 3), f4)],
         default=None, exclusive=True)
        
        mu_j, sigma_j = tf.case([(tf.equal(node_type2, 0), f9), (tf.equal(node_type2, 1), f10),
                              (tf.equal(node_type2, 2), f11), (tf.equal(node_type2, 3), f12)],
         default=None, exclusive=True)

        sigma_ratio = sigma_j / sigma_i
        trace_fac = tf.reduce_sum(sigma_ratio, 1)
        log_det = tf.reduce_sum(tf.log(sigma_ratio + 1e-11), 1)

        mu_diff_sq = tf.reduce_sum(tf.square(mu_i - mu_j) / sigma_i, 1)

        ij_kl = 0.5 * (trace_fac + mu_diff_sq - self.L - log_det)

        sigma_ratio = sigma_i / sigma_j
        trace_fac = tf.reduce_sum(sigma_ratio, 1)
        log_det = tf.reduce_sum(tf.log(sigma_ratio + 1e-11), 1)

        mu_diff_sq = tf.reduce_sum(tf.square(mu_j - mu_i) / sigma_j, 1)

        ji_kl = 0.5 * (trace_fac + mu_diff_sq - self.L - log_det)

        kl_distance = 0.5 * (ij_kl + ji_kl)

        return kl_distance

## train.py

In [26]:
import tensorflow as tf
import argparse
#from model_20 import GSNE
#from utils_20 import DataUtils, score_link_prediction
import pickle
import time
import scipy.sparse as sp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit, train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
import random
import copy
from IPython.display import clear_output
import random

In [27]:
node1_start, node1_end = (0, 218)
node2_start, node2_end = (218, 13557)
node3_start, node3_end = (13557, 14266)
node4_start, node4_end = (14266, 67117)

In [28]:
seed = 6521
MODEL_ID = 20
tensorboard_path = 'House Price Content List/Tensorboard/' + str(MODEL_ID)
model_path = 'House Price Content List/Model Params/' + str(MODEL_ID)+'/'
embedding_path = 'House Price Content List/Embeddings/' + str(MODEL_ID)+'/'

In [29]:
def train_test_split_sampled(arrName, pivotColumn, num_of_intervals):
    arrName = arrName[~np.isnan(arrName).any(axis=1)]
    col = np.array(arrName[pivotColumn])
    #print(col)
    bins = np.linspace(min(col), max(col), num_of_intervals)
    left = 0
    right = 1
    train = np.array([arrName.values[0]])
    test_set = np.array([arrName.values[0]])
    train = np.delete(train,(0), axis = 0)
    #np.delete(val,(0), axis = 0)
    test_set = np.delete(test_set,(0), axis = 0)
    
    portion_arr = []
    while(right < len(bins)):
      left_val = bins[left]
      right_val = bins[right]
      portion_arr = arrName[arrName[pivotColumn] >= left_val]
      portion_arr = portion_arr[portion_arr[pivotColumn] < right_val]
      if len(portion_arr) < 10 and right !=len(bins) - 1:
        right = right + 1
        continue
      train_temp, test_temp = train_test_split(portion_arr, test_size = 0.2, random_state = seed)
      #print(train_temp.values.shape)
      #print(train.shape)
      train = np.concatenate((train, np.array(train_temp.values)))
      #val_temp, test_temp = train_test_split(val_test_temp, test_size = 0.5)
      test_set = np.concatenate((test_set, np.array(test_temp.values)))

      left = right
      right = right + 1
      portion_arr = []

    return train, test_set

In [30]:
def score_node_classification(features, z, features_test, labels_test, p_labeled=0.8, n_repeat=10, norm=False):
    """
    Train a classifier using the node embeddings as features and reports the performance.
    Parameters
    ----------
    features : array-like, shape [N, L]
        The features used to train the classifier, i.e. the node embeddings
    z : array-like, shape [N]
        The ground truth labels
    p_labeled : float
        Percentage of nodes to use for training the classifier
    n_repeat : int
        Number of times to repeat the experiment
    norm
    Returns
    -------
    f1_micro: float
        F_1 Score (micro) averaged of n_repeat trials.
    f1_micro : float
        F_1 Score (macro) averaged of n_repeat trials.
    """
    '''if p_labeled == 0.8:
        p_labeled = 1 - random.uniform(0.2, 0.8)'''

    p_labeled = 0.5

    if norm:
        features = normalize(features)

    trace = []
    split_train1, split_train2 = None, None
    for seed in range(n_repeat):
        sss = ShuffleSplit(n_splits=1, test_size=1 - p_labeled, random_state=seed)
        split_train, split_test = next(sss.split(features, z))

        rfr = RandomForestRegressor(n_jobs=-1)
        rfr.fit(features[split_train], z[split_train])
        predicted = rfr.predict(features[split_test])

        mae = mean_absolute_error(z[split_test], predicted)
        mse = mean_squared_error(z[split_test], predicted)
        
        rfr = RandomForestRegressor(n_jobs=-1)
        rfr.fit(features, z)
        predicted = rfr.predict(features_test)

        mae2 = mean_absolute_error(labels_test, predicted)
        mse2 = mean_squared_error(labels_test, predicted)


        trace.append((mae, mse, mae2, mse2))

    return np.array(trace).mean(0)

In [31]:
def check_performance(number_of_iter, test_indices):
    ps = pd.read_pickle(embedding_path + 'gsne_cora_ml_embedding_graduate_second-order.pkl')
    #pf = pd.read_csv('features_concatenated.csv', sep=',', header=None)
    #raw_features = pf.values[14268:, 1:]
    features_train = np.array([np.array(ps['mu'][k]) for k in range(14266, len(ps['mu'])) if k not in test_indices])
    features_test = np.array([np.array(ps['mu'][k]) for k in range(14266, len(ps['mu'])) if k in test_indices])
    labels_df = pd.read_csv('Property_price.csv')
    labels_train = labels_df[~np.isin(labels_df['ID'], test_indices)]['price'].values
    labels_test = labels_df[np.isin(labels_df['ID'], test_indices)]['price'].values
    ''' for k in range(len(features)):
      if np.random.rand() < 0.00:
        print(str(k) + " feats: " +str(features[k]))'''

    mae, mse, mae2, mse2 = score_node_classification(features_train, labels_train, features_test, labels_test, n_repeat = 1) 
    if number_of_iter % 300 == 0 or number_of_iter < 1000:
      print("Embedding Features Results - MAE: "+ str(mae)+" RMSE: "+ str(mse**0.5)+" TMAE: "+ str(mae2)+" TRMSE: "+ str(mse2**0.5))
    return mse**0.5, mae2

In [32]:
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('name', default='cora_ml')
    parser.add_argument('model', default='gsne', help='gsne')
    parser.add_argument('--suf', default='')
    parser.add_argument('--proximity', default='second-order', help='first-order or second-order')
    parser.add_argument('--embedding_dim', type=int, default=32)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--K', type=int, default=5)
    parser.add_argument('--learning_rate', default=0.0002)
    parser.add_argument('--num_batches', type=int, default=600000)
    parser.add_argument('--is_all', default=True)  # train with all edges; no validation or test set
    args = parser.parse_args()
    args.is_all = True if args.is_all == 'True' else False
    train(args)

In [33]:
def train(args):
    """ graph_file = 'Data Tables Step 7 - Processed npzs_cleaned/graph_0_general_attributes.npz'
    graph_file1 = 'Data Tables Step 7 - Processed npzs_cleaned/graph_1_house_region.npz'
    graph_file2 = 'Data Tables Step 7 - Processed npzs_cleaned/graph_2_house_train.npz'
    graph_file3 = 'Data Tables Step 7 - Processed npzs_cleaned/graph_3_property_school.npz'
    graph_file4 = 'Data Tables Step 7 - Processed npzs_cleaned/graph_4_school_train.npz'
    graph_file5 = 'Data Tables Step 7 - Processed npzs_cleaned/graph_5_train_train.npz' """

    graph_file = 'Data Tables Step 7 - Processed npzs/graph_0_general_attributes.npz'
    graph_file1 = 'Data Tables Step 7 - Processed npzs/graph_1_house_region.npz'
    graph_file2 = 'Data Tables Step 7 - Processed npzs/graph_2_house_train.npz'
    graph_file3 = 'Data Tables Step 7 - Processed npzs/graph_3_property_school.npz'
    graph_file4 = 'Data Tables Step 7 - Processed npzs/graph_4_school_train.npz'
    graph_file5 = 'Data Tables Step 7 - Processed npzs/graph_5_train_train.npz' 

    #Dataset splitting for ensuring inductivity 
    price_file = 'Property_price.csv'
    df_price = pd.read_csv(price_file)
    train, tesst = train_test_split_sampled(df_price, 'price', 20)
    train_indices = train[:, 0]
    test_indices = tesst[:, 0]
    np.savetxt(embedding_path + "train.txt", train_indices)
    np.savetxt(embedding_path + "test.txt", test_indices)

    #Normal attribute and graph loaders, added with test indices for ensuring they don't get trained
    data_loader = DataUtils(graph_file, args.is_all, test_indices=test_indices) #THIS ONLY CONTAINS ATTRIBUTE INFO
    data_loader1 = DataUtils(graph_file1, args.is_all, data_loader.node_negative_distribution_temp,test_indices=test_indices)
    data_loader2 = DataUtils(graph_file2, args.is_all, data_loader.node_negative_distribution_temp,test_indices=test_indices)
    data_loader3 = DataUtils(graph_file3, args.is_all, data_loader.node_negative_distribution_temp,test_indices=test_indices)
    data_loader4 = DataUtils(graph_file4, args.is_all, data_loader.node_negative_distribution_temp,test_indices=test_indices)
    data_loader5 = DataUtils(graph_file5, args.is_all, data_loader.node_negative_distribution_temp,test_indices=test_indices)

    suffix = args.proximity
    args.X1 = data_loader.X1 if args.suf != 'oh' else sp.identity(data_loader1.X1.shape[0])
    args.X2 = data_loader.X2 if args.suf != 'oh' else sp.identity(data_loader2.X2.shape[0])
    args.X3 = data_loader.X3 if args.suf != 'oh' else sp.identity(data_loader3.X3.shape[0])
    args.X4 = data_loader.X4 if args.suf != 'oh' else sp.identity(data_loader4.X4.shape[0])

    m = args.model
    name = m + '_' + args.name
    if 'gsne' == m:
        model = GSNE(args)
    else:
        raise Exception("Only gsne available")

    writer = tf.summary.FileWriter(tensorboard_path)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        #saver.restore(sess, model_path+"model_graduate_best.ckpt")
        writer.add_graph(sess.graph)
        print('-------------------------- ' + m + ' --------------------------')
        print('batches\tloss\tsampling time\ttraining_time\tdatetime')

        tf.global_variables_initializer().run()
        sampling_time, training_time = 0, 0

        previous_best = 9999999999

        for b in range(args.num_batches):
            if b%35000 == 0:
              clear_output()
            t1 = time.time()
            #CREATE DIFFERENT SAMPLER
            if b%5 < 0:
                #sess.run(model.zero_ops)
                u_i, u_j, label, node_type1, node_type2 = data_loader1.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            elif b%5 < 0:
                u_i, u_j, label, node_type1, node_type2 = data_loader2.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            elif b%5 >= 0:
                u_i, u_j, label, node_type1, node_type2 = data_loader3.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            elif b%5 == 3:
                u_i, u_j, label, node_type1, node_type2 = data_loader4.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            else:
                u_i, u_j, label, node_type1, node_type2 = data_loader5.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            
            #u_i, u_j, label, w = data_loader.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            feed_dict = {model.u_i: u_i, model.u_j: u_j, model.label: label, model.node_type1 : node_type1, model.node_type2 : node_type2}

            t2 = time.time()
            sampling_time += t2 - t1

            #loss, _ = sess.run([model.loss, model.accum_ops], feed_dict=feed_dict)
            loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
            ''' if (b%5 == 4 or True):
              sess.run(model.train_step) '''
            training_time += time.time() - t2

            if b%5 < 5:
              s = sess.run(model.merged_summary, feed_dict = feed_dict)
              writer.add_summary(s, b)
              writer.flush()


            if b % 5000 < 5:
                print('%d\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, sampling_time, training_time,
                                                        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))

                sampling_time, training_time = 0, 0

            if b!=0 and ( b % 50 == 0 or b == (args.num_batches - 1)):
                if m == 'gsne':
                    
                    mu1, sigma1 = sess.run([model.embedding1, model.sigma1])
                    mu2, sigma2 = sess.run([model.embedding2, model.sigma2])
                    mu3, sigma3 = sess.run([model.embedding3, model.sigma3])
                    mu4, sigma4 = sess.run([model.embedding4, model.sigma4])

                    mu = copy.deepcopy(mu1)
                    mu[node1_start: node1_end] = mu1[node1_start: node1_end]
                    mu[node2_start: node2_end] = mu2[node2_start: node2_end]
                    mu[node3_start: node3_end] = mu3[node3_start: node3_end]
                    mu[node4_start: node4_end] = mu4[node4_start: node4_end]

                    sigma = copy.deepcopy(sigma1)
                    sigma[node1_start: node1_end] = sigma1[node1_start: node1_end]
                    sigma[node2_start: node2_end] = sigma2[node2_start: node2_end]
                    sigma[node3_start: node3_end] = sigma3[node3_start: node3_end]
                    sigma[node4_start: node4_end] = sigma4[node4_start: node4_end]
                    
                    pickle.dump({'mu': data_loader.embedding_mapping(mu),
                                 'sigma': data_loader.embedding_mapping(sigma)},
                                open(embedding_path + '%s%s_embedding_graduate_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))
                    
                    save_path = saver.save(sess, model_path + "model_graduate.ckpt")
                    
                    curr_mae, val_mae = check_performance(number_of_iter = b, test_indices = test_indices)

                    if curr_mae < previous_best:
                      previous_best = curr_mae
                      print("new best result train rmse: "+str(curr_mae) + " test mae:"+str(val_mae))
                      pickle.dump({'mu': data_loader.embedding_mapping(mu),
                                 'sigma': data_loader.embedding_mapping(sigma)},
                                open(embedding_path + '%s%s_embedding_graduate_%s_best.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))
                      save_path = saver.save(sess, model_path + "model_graduate_best.ckpt")

                else:
                    raise Exception("only GSNE supported")

In [35]:
'''
if __name__ == '__main__':
    main()
'''

"\nif __name__ == '__main__':\n    main()\n"