In [275]:
################################################
###Data Processing for GAT-TSNE Algorithm ######
################################################
##               Heqiao Ruan                  ##
##           Department of Statistics,        ##
##                 Genome Center,             ##
##        University of California, Davis     ##
##                 hruan@ucdavis.edu          ##
################################################


In [108]:
from sklearn.datasets import fetch_20newsgroups
twenty_groupdata = fetch_20newsgroups(shuffle = True, random_state = 42, remove = ('header', 'footers', 'quotes'))


In [67]:
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from lxml import html, etree
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score, f1_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.layers import LSTM, Dense, Activation, Embedding, Dropout, Input, RepeatVector, TimeDistributed, Bidirectional
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical


In [75]:
os.chdir('/Users/Rhq/Desktop/UCDAVIS/ECS 271/Project/Data/cora/')

In [37]:
########
#Cora data:
feature_labels = np.genfromtxt('/Users/Rhq/Desktop/UCDAVIS/ECS 271/Project/Data/cora/cora.content', dtype = np.dtype(str))
                               
                               

In [38]:
features = sp.csr_matrix(feature_labels[:, 1: -1], dtype = np.float32)
idx = np.array(feature_labels[:, 0], dtype = np.int32)
idx_map = {j: i for i,j in enumerate(idx)}

In [39]:
#One-hot encoding the labels:
def one_hot_enc(Labels):
	#Encode the labels:
	set_of_labels = set(Labels)
	class_dict = {c: np.identity(len(set_of_labels))[i, :] for i, c in enumerate(set_of_labels)}
	one_hot_label = np.array(list(map(class_dict.get, Labels)),
		dtype = np.int32)

	return one_hot_label
labels = one_hot_enc(feature_labels[:, -1])


In [40]:
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx.todense()

In [41]:
#Construct adjacency matrix:
edges_0 = np.genfromtxt('/Users/Rhq/Desktop/UCDAVIS/ECS 271/Project/Data/cora/cora.cites', dtype = np.int32)
edges = np.array(list(map(idx_map.get, edges_0.flatten())),
	dtype = np.int32).reshape(edges_0.shape)
adj_matrix = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
	shape = (labels.shape[0], labels.shape[0]), dtype = np.float32)
adj_matrix = adj_matrix + adj_matrix.T.multiply(adj_matrix.T > adj_matrix) 

In [42]:
adj = normalize(adj_matrix + sp.eye(adj_matrix.shape[0]))
feature_data = normalize(features)

In [59]:
class gat_arc(Layer):

	def __init__(self, out_feature, attn_integrate = 'average', dropout = 0.3, activation = 'LeakyRelu', kernel_initializer = 'glorot_uniform', bias_initializer = 'zeros', weight_regularizer = None, bias_regularizer = None, attn_num = 8, sigma = 10):
		self.out_feature = out_feature #The number of output features
		self.activation  = activations.get(activation) # Activation function.
		self.kernel_initializer = initializers.get(kernel_initializer) # Weight initialization
		self.bias_initializer = initializers.get(bias_initializer) # Bias initialization
		self.weight_regularizer = regularizers.get(weight_regularizer) # Weight regularizer
		self.bias_regularizer = regularizers.get(bias_regularizer) # Bias regularizer
		self.attn_integrate = attn_integrate # The type of reduction, either average or conca.
		self.dropout = dropout # Dropout rate
		self.attn_num = attn_num # The number for multi-attention concatenation
		self.sigma = sigma # The sigma for the rbf kernel of TSNE.
		self.head_kernel = []
		self.bias_kernel = []
		self.attention_kernel = []
		###########
		if attn_integrate == 'conca':
			self.output_dimen = self.out_feature * self.attn_header
		else:
			self.output_dimen = self.out_feature

	def construct_attention(self, Node_feature, Adj_matrix):
		"""
		:Construct the attention Network.
		:Generate the node feature representation.
		"""

		output = []
		for i in range(self.attn_num):
			#Construct the weights for attention head and neighbors attentions:
			current_head_kernel = self.add_weight(shape = (p, self.output_feature), initializer = self.kernel_initializer, regularizer = self.weight_regularizer,
				name = 'Head_Number-{}'.format(i))
			current_bias = self.add_weight(shape = (self.output_feature, ), initializer = self.bias_initializer, regularizer = self.bias_regularizer,
				name = 'Head_Bias-{}'.format(i))
			self.head_kernel.append(current_head_kernel)
			self.bias_kernel.append(current_bias)

			current_attention_itself = self.add_weight(shape = (self.output_feature, 1),
				initializer = self.kernel_initializer)
			current_attention_neighbor = self.add_weight(shape = (self.output_feature, 1),
				initializer = self.kernel_initializer)
			self.attention_kernel.append([current_attention_itself, current_attention_neighbor])

		#Then put the feature onto the assigned tensors:
		for i in range(self.attn_num):
			current_kernel = self.head_kernel[i]
			current_attention_kernel = self.attention_kernel[i]

			#Inputs for the attention kernel:
			current_feature = K.dot(Node_feature, current_kernel)

			#Attention mechanism combination:
			current_attention_itself = K.dot(current_feature, current_attention_kernel[0])
			current_attention_neighbors = K.dot(current_feature, current_attention_kernel[1])
			ATTE = current_attention_itself + K.transpose(current_attention_neighbors)

			#lrelu:
			ATTE = LeakyRelu(alpha = 0.25)(ATTE)

			#Restrict the attention to its neighbors by a mask:
			MASK = K.exp(Adj_matrix * -10e9) * -10e9
			ATTE += MASK

			#Get the Attention coefficients:
			ATTE = K.softmax(ATTE)

			#Apply the dropout to features and attention coefficients:
			current_feature = Dropout(self.dropout)(current_feature)
			ATTE = Dropout(self.dropout)(current_feature)

			#Combination for the neighbors' features:
			Node_feature = K.dot(ATTE, current_feature)

			Node_feature = K.bias_add(Node_features, self.bias_kernel[i])

		#Aggregate the heads together:
		if self.attn_integrate == 'conca':
			outputS = K.concatenate(output)
		else:
			outputS = K.mean(K.stack(output), axis = 0)

		#Activate the output:
		outputS = self.activation(outputS)

		return outputS

	#Attach a TSNE-loss regularization apart from the classification loss:
	#Modify the perplexity as the neighboring nodes.
	def TSNE_loss(self, inputs, outputs, Adj_matrix):
		#Here we attach the different loss in terms of the tsne loss:
		n = inputs.shape[0]
		P_table = np.zeros((n, n))
		Q_table = np.zeros((n, n))

		#Construct the graph similarity measure:
		for i in range(n):
			#Find all neighbor index:
			denom = 0
			pnumm = []
			qnumm = []
			neighbor_index = [i for i, x in enumerate(list(Adj_matrix[i])) if x == 1]
			#Calculate the P and Q values:
			for k in neighbor_index:
				#Calculate the P table:
				pnum = self.compute_p(inputs, i, k, self.sigma)
				pnumm.append(pnum)
				pnumm /= np.sum(pnumm)

				#Calculate the Q table:
				qnum = self.compute_q(outputs, i, k)
				qnumm.append(qnum)
				qnumm /= np.sum(qnumm)
			P_table[i, neighbor_index] = pnumm
			Q_table[i, neighbor_index] = qnumm
		
		#Normalize the P_table:
		for i in range(n):
			for j in range(i, n):
				s = P_table[i, j]
				t = P_table[j, i]
				P_table[i, j] = (s + t) / 2
				P_table[j, i] = (s + t) / 2
		return kl_div

	def compute_p(self, X, i, j):
		#Function to calculate the P in the original feature space:
		x1 = X[i]
		x2 = X[j]
		num = np.exp(-np.linalg.norm(x1 - x2) ** 2 / (2 * sigma ** 2))
		return num

	def compute_q(self, Y, i, j):
		#Function to calculate the Likelihood in the compressed feature space:
		y1 = Y[i]
		y2 = Y[j]
		num = (1 + np.linalg.norm(y1 - y2) ** 2) ** (-1)
		return num


    #Function to calculate the KL_loss:
	def KL_div(self, Inputs, Q):
		kld = 0
		n = P.shape[0]
		for i in range(n):
			for j in range(n):
				if (P[i, j] != 0) and (Q[i, j] != 0):
					kld += P[i, j] * np.log(P[i,j] / Q[i,j])
		return kld



In [178]:
#Process the T4S1 dataset:
import pandas as pd
os.chdir('/Users/Rhq/Desktop/UCDAVIS/ECS 271/Project/Data')
AA_T4 = pd.read_csv('T4_scRNA.csv', sep = ' ')
BB_S1 = pd.read_csv('S1_scRNA.csv', sep = ' ')
AA_T4 = np.asarray(AA_T4)
BB_S1 = np.asarray(BB_S1)

In [235]:
es_callback = EarlyStopping(monitor='acc', patience=es_patience)
tb_callback = TensorBoard(batch_size=N)
mc_callback = ModelCheckpoint('logs/best_model.h5',
                              monitor='acc',
                              save_best_only=True,
                              save_weights_only=True)

In [220]:
n_A = AA_T4.shape[0]
n_B = BB_S1.shape[0]
labels_t4s1= np.zeros((n_A + n_B ,2))
labels_t4s1[:n_A,0] = 1
labels_t4s1[n_A:(n_A + n_B), 1] = 1
labels_t4s1

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [180]:
Adj_T4 = kneighbors_graph(AA_T4, n_neighbors = 200)

In [181]:
Adj_S1 = kneighbors_graph(BB_S1, n_neighbors = 200)

In [185]:
AB_T4S1 = np.concatenate([AA_T4, BB_S1])

In [203]:
Adj_t4s1 = kneighbors_graph(AB_T4S1, n_neighbors = 200)

In [247]:
#Build a model for scRNA-seq data:
N = AB_T4S1.shape[0]
F = AB_T4S1.shape[1]
F_ = 8
n_attn_heads = 8
dropout_rate = 0.6
l2_reg = 1e-3
X_in = Input(shape=(F,))
n_classes = 2
A_in = Input(shape=(N,))

dropout1 = Dropout(dropout_rate)(X_in)
graph_attention_1 = GraphAttention(F_,
                                   attn_heads=n_attn_heads,
                                   attn_heads_reduction='concat',
                                   dropout_rate=dropout_rate,
                                   activation='elu',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in])
dropout2 = Dropout(dropout_rate)(graph_attention_1)
graph_attention_2 = GraphAttention(n_classes,
                                   attn_heads=1,
                                   attn_heads_reduction='average',
                                   dropout_rate=dropout_rate,
                                   activation='sigmoid',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in])

# Build model
models = Model(inputs = [X_in, A_in], outputs = graph_attention_1)
model = Model(inputs=[X_in, A_in], outputs=graph_attention_2)

In [None]:
id_val = np.randint()
def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)



In [248]:
#fit the model:
learning_rate = 1e-6
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
              loss='binary_crossentropy',
              metrics=['acc'])
model.summary()
history2 = model.fit([AB_T4S1, Adj_t4s1], labels_t4s1, epochs = 15, batch_size = N, shuffle = False, callbacks=[es_callback, tb_callback, mc_callback])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_35 (InputLayer)           (None, 1851)         0                                            
__________________________________________________________________________________________________
dropout_200 (Dropout)           (None, 1851)         0           input_35[0][0]                   
__________________________________________________________________________________________________
input_36 (InputLayer)           (None, 4777)         0                                            
__________________________________________________________________________________________________
graph_attention_19 (GraphAttent (None, 64)           118656      dropout_200[0][0]                
                                                                 input_36[0][0]                   
__________

In [249]:
model.load_weights('logs/best_model.h5')


In [255]:
models.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_35 (InputLayer)           (None, 1851)         0                                            
__________________________________________________________________________________________________
dropout_200 (Dropout)           (None, 1851)         0           input_35[0][0]                   
__________________________________________________________________________________________________
input_36 (InputLayer)           (None, 4777)         0                                            
__________________________________________________________________________________________________
graph_attention_19 (GraphAttent (None, 64)           118656      dropout_200[0][0]                
                                                                 input_36[0][0]                   
Total para

In [256]:
lowdim_repre = models.predict([AB_T4S1, Adj_t4s1])

InvalidArgumentError: Incompatible shapes: [32,32] vs. [32,4777]
	 [[{{node graph_attention_19/add_1}} = Add[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](graph_attention_19/leaky_re_lu_87/LeakyRelu, graph_attention_19/mul)]]

In [253]:
#Get the low-dimensional representation:

intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer('graph_attention_20').output)
lowdim_repre = intermediate_layer_model.predict([AB_T4S1, Adj_t4s1])

InvalidArgumentError: Incompatible shapes: [32,32] vs. [32,4777]
	 [[{{node graph_attention_19/add_11}} = Add[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](graph_attention_19/leaky_re_lu_92/LeakyRelu, graph_attention_20/mul)]]

In [250]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_35 (InputLayer)           (None, 1851)         0                                            
__________________________________________________________________________________________________
dropout_200 (Dropout)           (None, 1851)         0           input_35[0][0]                   
__________________________________________________________________________________________________
input_36 (InputLayer)           (None, 4777)         0                                            
__________________________________________________________________________________________________
graph_attention_19 (GraphAttent (None, 64)           118656      dropout_200[0][0]                
                                                                 input_36[0][0]                   
__________

In [239]:
get_rd_layer_output = K.function([model.layers[0].input, K.learning_phase()],
                                  [model.layers[3].output])

<keras.engine.input_layer.InputLayer at 0x15159aeb8>

In [245]:
model.layers[0]

<keras.engine.input_layer.InputLayer at 0x15159aeb8>

In [61]:
N = 100
F_ = 8
X_in = Input(shape = (F_, ))
A_in = Input(shape = (N, ))
X = Dropout(0.2)(X_in)
g_atten_1 = gat_arc(out_feature = F_, attn_num = 8,
                         attn_integrate = 'concat', dropout = 0.3, activation = 'elu',
                         )([X, A_in])
X2 = Dropout(0.2)(g_atten_1)
g_atten_2 = gat_arc(out_feature = 7, attn_num = 2, attn_integrate = 'average', activation = 'softmax')

model = Model(inputs = [X_in, A_in], outputs = g_atten_2)
model.summary()



AttributeError: 'gat_arc' object has no attribute 'name'

In [159]:
X = np.random.rand(10,10)

In [165]:
from sklearn.neighbors import kneighbors_graph
XMAT = kneighbors_graph(X, n_neighbors = 6, mode = 'connectivity')


In [166]:
XMAT.todense()

matrix([[0., 0., 1., 0., 1., 1., 0., 1., 1., 1.],
        [1., 0., 0., 1., 0., 1., 1., 1., 0., 1.],
        [1., 0., 0., 1., 1., 0., 1., 1., 1., 0.],
        [0., 1., 1., 0., 0., 1., 1., 1., 0., 1.],
        [1., 0., 1., 1., 0., 1., 1., 0., 1., 0.],
        [1., 1., 0., 1., 0., 0., 1., 1., 0., 1.],
        [0., 1., 1., 1., 1., 1., 0., 1., 0., 0.],
        [0., 1., 1., 1., 0., 1., 1., 0., 0., 1.],
        [1., 0., 1., 1., 1., 0., 0., 1., 0., 1.],
        [0., 0., 1., 1., 0., 1., 1., 1., 1., 0.]])

In [257]:
class GraphAttention(Layer):

    def __init__(self,
                 F_,
                 attn_heads=1,
                 attn_heads_reduction='concat',  # {'concat', 'average'}
                 dropout_rate=0.5,
                 activation='relu',
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 attn_kernel_initializer='glorot_uniform',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 attn_kernel_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 attn_kernel_constraint=None,
                 **kwargs):
        if attn_heads_reduction not in {'concat', 'average'}:
            raise ValueError('Possbile reduction methods: concat, average')

        self.F_ = F_  # Number of output features (F' in the paper)
        self.attn_heads = attn_heads  # Number of attention heads (K in the paper)
        self.attn_heads_reduction = attn_heads_reduction  # Eq. 5 and 6 in the paper
        self.dropout_rate = dropout_rate  # Internal dropout rate
        self.activation = activations.get(activation)  # Eq. 4 in the paper
        self.use_bias = use_bias

        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.attn_kernel_initializer = initializers.get(attn_kernel_initializer)

        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.attn_kernel_regularizer = regularizers.get(attn_kernel_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)
        self.attn_kernel_constraint = constraints.get(attn_kernel_constraint)
        self.supports_masking = False

        # Populated by build()
        self.kernels = []       # Layer kernels for attention heads
        self.biases = []        # Layer biases for attention heads
        self.attn_kernels = []  # Attention kernels for attention heads

        if attn_heads_reduction == 'concat':
            # Output will have shape (..., K * F')
            self.output_dim = self.F_ * self.attn_heads
        else:
            # Output will have shape (..., F')
            self.output_dim = self.F_

        super(GraphAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) >= 2
        F = input_shape[0][-1]

        # Initialize weights for each attention head
        for head in range(self.attn_heads):
            # Layer kernel
            kernel = self.add_weight(shape=(F, self.F_),
                                     initializer=self.kernel_initializer,
                                     regularizer=self.kernel_regularizer,
                                     constraint=self.kernel_constraint,
                                     name='kernel_{}'.format(head))
            self.kernels.append(kernel)

            # # Layer bias
            if self.use_bias:
                bias = self.add_weight(shape=(self.F_, ),
                                       initializer=self.bias_initializer,
                                       regularizer=self.bias_regularizer,
                                       constraint=self.bias_constraint,
                                       name='bias_{}'.format(head))
                self.biases.append(bias)

            # Attention kernels
            attn_kernel_self = self.add_weight(shape=(self.F_, 1),
                                               initializer=self.attn_kernel_initializer,
                                               regularizer=self.attn_kernel_regularizer,
                                               constraint=self.attn_kernel_constraint,
                                               name='attn_kernel_self_{}'.format(head),)
            attn_kernel_neighs = self.add_weight(shape=(self.F_, 1),
                                                 initializer=self.attn_kernel_initializer,
                                                 regularizer=self.attn_kernel_regularizer,
                                                 constraint=self.attn_kernel_constraint,
                                                 name='attn_kernel_neigh_{}'.format(head))
            self.attn_kernels.append([attn_kernel_self, attn_kernel_neighs])
        self.built = True

    def call(self, inputs):
        X = inputs[0]  # Node features (N x F)
        A = inputs[1]  # Adjacency matrix (N x N)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(features, attention_kernel[0])    # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearty
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            mask = -10e9 * (1.0 - A)
            dense += mask

            # Apply softmax to get attention coefficients
            dense = K.softmax(dense)  # (N x N)

            # Apply dropout to features and attention coefficients
            dropout_attn = Dropout(self.dropout_rate)(dense)  # (N x N)
            dropout_feat = Dropout(self.dropout_rate)(features)  # (N x F')

            # Linear combination with neighbors' features
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == 'concat':
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        output = self.activation(output)
        return output

    def compute_output_shape(self, input_shape):
        output_shape = input_shape[0][0], self.output_dim
        return output_shape

In [None]:
n_classes = 

In [259]:
import os
import pickle as pkl
import sys

import networkx as nx
import numpy as np
import scipy.sparse as sp


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)


def load_data(dataset_str):
    """Load data."""
    #FILE_PATH = os.path.abspath(__file__)
    #DIR_PATH = os.path.dirname(FILE_PATH)
    #DATA_PATH = os.path.join(DIR_PATH, 'data/')

    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("ind.{}.{}".format( dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("ind.{}.test.index".format( dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)
    #idx_test = range(len(y) + 500, len(labels))

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask

In [209]:
A_cit, X_cit, Y_train_cit, Y_val_cit, Y_test_cit, id_train_cit, id_val_cit, id_test_cit = load_data('citeseer')


In [None]:
#Evaluate on the 
np.sum(id_train_cit) + np.sum()

In [294]:
os.chdir('/Users/Rhq/Desktop/UCDAVIS/ECS 271/Project/Data/cora')
A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = load_data('citeseer')
N = X.shape[0]                # Number of nodes in the graph
F = X.shape[1]                # Original feature dimension
n_classes = Y_train.shape[1]  # Number of classes
F_ = 8                        # Output size of first GraphAttention layer
n_attn_heads = 8              # Number of attention heads in first GAT layer
dropout_rate = 0.6            # Dropout rate (between and inside GAT layers)
l2_reg = 5e-4/2               # Factor for l2 regularization
learning_rate = 5e-3          # Learning rate for Adam
epochs = 10000                # Number of training epochs
es_patience = 100    

In [295]:
#A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = load_data('cora')

# Parameters
N = X.shape[0]                # Number of nodes in the graph
F = X.shape[1]                # Original feature dimension
n_classes = Y_train.shape[1]  # Number of classes
F_ = 16                        # Output size of first GraphAttention layer
n_attn_heads = 16              # Number of attention heads in first GAT layer
dropout_rate = 0.6            # Dropout rate (between and inside GAT layers)
l2_reg = 5e-4/2               # Factor for l2 regularization
learning_rate = 5e-3          # Learning rate for Adam
epochs = 500          # Number of training epochs
es_patience = 100             # Patience fot early stopping

# Preprocessing operations
X = preprocess_features(X)
A = A + np.eye(A.shape[0])  # Add self-loops

# Model definition (as per Section 3.3 of the paper)
X_in = Input(shape=(F,))
A_in = Input(shape=(N,))

dropout1 = Dropout(dropout_rate)(X_in)
graph_attention_1 = GraphAttention(F_,
                                   attn_heads=n_attn_heads,
                                   attn_heads_reduction='concat',
                                   dropout_rate=dropout_rate,
                                   activation='elu',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in])
dropout2 = Dropout(dropout_rate)(graph_attention_1)
graph_attention_2 = GraphAttention(n_classes,
                                   attn_heads=1,
                                   attn_heads_reduction='average',
                                   dropout_rate=dropout_rate,
                                   activation='softmax',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in])

  if sys.path[0] == '':


In [296]:
model = Model(inputs=[X_in, A_in], outputs=graph_attention_2)
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              weighted_metrics=['acc'])
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_53 (InputLayer)           (None, 3703)         0                                            
__________________________________________________________________________________________________
dropout_396 (Dropout)           (None, 3703)         0           input_53[0][0]                   
__________________________________________________________________________________________________
input_54 (InputLayer)           (None, 3327)         0                                            
__________________________________________________________________________________________________
graph_attention_37 (GraphAttent (None, 256)          948736      dropout_396[0][0]                
                                                                 input_54[0][0]                   
__________

In [297]:
es_callback = EarlyStopping(monitor='val_weighted_acc', patience=es_patience)
tb_callback = TensorBoard(batch_size=N)
mc_callback = ModelCheckpoint('logs/best_model.h5',
                              monitor='val_weighted_acc',
                              save_best_only=True,
                              save_weights_only=True)

# Train model
validation_data = ([X, A], Y_val, idx_val)
model.fit([X, A],
          Y_train,
          sample_weight=idx_train,
          epochs = 80,
          batch_size=N,
          validation_data=validation_data,
          shuffle=False,  # Shuffling data means shuffling the whole graph
          callbacks=[es_callback, tb_callback, mc_callback])

# Load best model
model.load_weights('logs/best_model.h5')

# Evaluate model
eval_results = model.evaluate([X, A],
                              Y_test,
                              sample_weight=idx_test,
                              batch_size=N,
                              verbose=0)
eval_results

Train on 3327 samples, validate on 3327 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80


Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


[1.6704590320587158, 0.7359996438026428]

In [293]:
#Record the 
#8 -> 0.8160
#16 -> 0.8

In [273]:
F = X.shape[1]
F_ = 8
n_attn_heads = 8
dropout_rate = 0.4
l2_reg = 5e-4
X_in = Input(shape=(F,))
n_classes = 7
A_in = Input(shape=(N,))
N = X.shape[0]

dropout1 = Dropout(dropout_rate)(X_in)
graph_attention_1 = GraphAttention(F_,
                                   attn_heads=n_attn_heads,
                                   attn_heads_reduction='concat',
                                   dropout_rate=dropout_rate,
                                   activation='elu',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in])
dropout2 = Dropout(dropout_rate)(graph_attention_1)
graph_attention_2 = GraphAttention(F_,
                                   attn_heads=1,
                                   attn_heads_reduction='average',
                                   dropout_rate=dropout_rate,
                                   activation='softmax',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in])
gf2 = 
# Build model
models = Model(inputs = [X_in, A_in], outputs = graph_attention_1)
model = Model(inputs=[X_in, A_in], outputs=graph_attention_2)
learning_rate = 5e-4
model = Model(inputs=[X_in, A_in], outputs=graph_attention_2)
optimizer = Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              weighted_metrics=['acc'])
model.summary()
validation_data = ([X_in, A_in], Y_train, idx_val)
history1 = model.fit([X_in, A_in],
          Y_val,
          sample_weight=idx_train,
          epochs = 30, 
          batch_size = N,
          validation_data=validation_data,
          shuffle=False,  # Shuffling data means shuffling the whole graph
          callbacks=[es_callback,  mc_callback])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_45 (InputLayer)           (None, 1433)         0                                            
__________________________________________________________________________________________________
dropout_300 (Dropout)           (None, 1433)         0           input_45[0][0]                   
__________________________________________________________________________________________________
input_46 (InputLayer)           (None, 2708)         0                                            
__________________________________________________________________________________________________
graph_attention_29 (GraphAttent (None, 64)           91904       dropout_300[0][0]                
                                                                 input_46[0][0]                   
__________

ValueError: When feeding symbolic tensors to a model, we expect thetensors to have a static batch size. Got tensor with shape: (None, 1433)

In [217]:
from keract import get_activations
activations = get_activations(model, [X_cit, A_cit], 'graph_attention_13')


ValueError: setting an array element with a sequence.

In [None]:
#Compare to the TSNE visualization of the graph features:


In [151]:
eval_results = model.evaluate([X, A],
                              Y_test,
                              sample_weight=idx_test,
                              batch_size=N,
                              verbose=0)

In [152]:
eval_results

[0.4395214319229126, 0.9008708596229553]

In [None]:
y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)


In [None]:
#Train the original model:
n_label = labels.shape[1]
n_atten_head = 16
learning_rate = 5e-3
epochs = 1000


In [None]:
F = X.shape[1]
F_ = 8
n_attn_heads = 8
dropout_rate = 0.4
l2_reg = 5e-4
X_in = Input(shape=(F,))
n_classes = 7
A_in = Input(shape=(N,))

dropout1 = Dropout(dropout_rate)(X_in)
graph_attention_1 = GraphAttention(F_,
                                   attn_heads=n_attn_heads,
                                   attn_heads_reduction='concat',
                                   dropout_rate=dropout_rate,
                                   activation='elu',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout1, A_in])
dropout2 = Dropout(dropout_rate)(graph_attention_1)
graph_attention_2 = GraphAttention(n_classes,
                                   attn_heads=1,
                                   attn_heads_reduction='average',
                                   dropout_rate=dropout_rate,
                                   activation='softmax',
                                   kernel_regularizer=l2(l2_reg),
                                   attn_kernel_regularizer=l2(l2_reg))([dropout2, A_in])

# Build model
model = Model(inputs=[X_in, A_in], outputs=graph_attention_2)

In [None]:
#Construct model to calculate the novel loss function:
def TSNE_model(sigma):
    Feature_in = Input(shape = (F, ))
    Data_in =
    

In [None]:
########
#Simulate a fully-connected KNN Graph on top of the Single-cell Genomic Data:

