In [None]:
import tensorflow as tf
import numpy as np
import graph

In [None]:
#NFEATURES = 28**2
NFEATURES = 1000
#NCLASSES = 10
NCLASSES = 20

# Common methods for all models

In [None]:
class base_model(object):
    
    def __init__(self):
        self.regularizers = []
    
    def inference(self, data, dropout):
        """
        It builds the model, i.e. the computational graph, as far as
        is required for running the network forward to make predictions,
        i.e. return logits given raw data.

        data: size N x M
            N: number of signals (samples)
            M: number of vertices (features)
        training: we may want to discriminate the two, e.g. for dropout.
            True: the model is built for training.
            False: the model is built for evaluation.
        """
        logits = self._inference(data, dropout)
        return logits
    
    def prediction(self, logits, labels):
        """Return the probability of a sample to belong to each class."""
        with tf.name_scope('prediction'):
            predictions = tf.nn.softmax(logits)
            return predictions

    def loss(self, logits, labels, regularization):
        """Adds to the inference model the layers required to generate loss."""
        with tf.name_scope('loss'):
            with tf.name_scope('cross_entropy'):
                labels = tf.to_int64(labels)
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)
                cross_entropy = tf.reduce_mean(cross_entropy)
            with tf.name_scope('regularization'):
                regularization *= tf.add_n(self.regularizers)
            loss = cross_entropy + regularization
            
            # Summaries for TensorBoard.
            tf.scalar_summary('loss/cross_entropy', cross_entropy)
            tf.scalar_summary('loss/regularization', regularization)
            tf.scalar_summary('loss/total', loss)
            with tf.name_scope('averages'):
                averages = tf.train.ExponentialMovingAverage(0.9)
                op_averages = averages.apply([cross_entropy, regularization, loss])
                tf.scalar_summary('loss/avg/cross_entropy', averages.average(cross_entropy))
                tf.scalar_summary('loss/avg/regularization', averages.average(regularization))
                tf.scalar_summary('loss/avg/total', averages.average(loss))
                with tf.control_dependencies([op_averages]):
                    loss_average = tf.identity(averages.average(loss), name='control')
            return loss, loss_average
    
    def training(self, loss, learning_rate, decay_step, decay_rate=0.95, momentum=0.9):
        """Adds to the loss model the Ops required to generate and apply gradients."""
        with tf.name_scope('training'):
            # Learning rate.
            global_step = tf.Variable(0, name='global_step', trainable=False)
            if decay_rate != 1:
                learning_rate = tf.train.exponential_decay(
                        learning_rate, global_step, decay_step, decay_rate, staircase=True)
            tf.scalar_summary('learning_rate', learning_rate)
            # Optimizer.
            if momentum == 0:
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            else:
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
            grads = optimizer.compute_gradients(loss)
            op_gradients = optimizer.apply_gradients(grads, global_step=global_step)
            # Histograms.
            for grad, var in grads:
                tf.histogram_summary(var.op.name + '/gradients', grad)
            # The op return the learning rate.
            with tf.control_dependencies([op_gradients]):
                op_train = tf.identity(learning_rate, name='control')
            return op_train
    
    def evaluation(self, logits, labels):
        """Return the number of correct predictions."""
        with tf.name_scope('evaluation'):
            correct = tf.nn.in_top_k(logits, labels, 1)
            ncorrects = tf.reduce_sum(tf.cast(correct, tf.int32))
            return ncorrects

    # Helpers

    def _weight_variable(self, shape, regularization=True):
        initial = tf.truncated_normal_initializer(0, 0.1)
        var = tf.get_variable('weights', shape, tf.float32, initializer=initial)
        if regularization:
            self.regularizers.append(tf.nn.l2_loss(var))
        tf.histogram_summary(var.op.name, var)
        return var

    def _bias_variable(self, shape, regularization=True):
        initial = tf.constant_initializer(0.1)
        var = tf.get_variable('bias', shape, tf.float32, initializer=initial)
        if regularization:
            self.regularizers.append(tf.nn.l2_loss(var))
        tf.histogram_summary(var.op.name, var)
        return var

    def _conv2d(self, x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# Fully connected

In [None]:
class fc1(base_model):
    def __init__(self):
        super().__init__()
    def _inference(self, x, dropout):
        W = self._weight_variable([NFEATURES, NCLASSES])
        b = self._bias_variable([NCLASSES])
        y = tf.matmul(x, W) + b
        return y

class fc2(base_model):
    def __init__(self, nhiddens):
        super().__init__()
        self.nhiddens = nhiddens
    def _inference(self, x, dropout):
        with tf.name_scope('fc1'):
            W = self._weight_variable([NFEATURES, self.nhiddens])
            b = self._bias_variable([self.nhiddens])
            y = tf.nn.relu(tf.matmul(x, W) + b)
        with tf.name_scope('fc2'):
            W = self._weight_variable([self.nhiddens, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.matmul(y, W) + b
        return y

# Convolutional

In [None]:
class cnn2(base_model):
    """Simple convolutional model."""
    def __init__(self, K, F):
        super().__init__()
        self.K = K  # Patch size
        self.F = F  # Number of features
    def _inference(self, x, dropout):
        with tf.name_scope('conv1'):
            W = self._weight_variable([self.K, self.K, 1, self.F])
            b = self._bias_variable([self.F])
#            b = self._bias_variable([1, 28, 28, self.F])
            x_2d = tf.reshape(x, [-1,28,28,1])
            y_2d = self._conv2d(x_2d, W) + b
            y_2d = tf.nn.relu(y_2d)
        with tf.name_scope('fc1'):
            y = tf.reshape(y_2d, [-1, NFEATURES*self.F])
            W = self._weight_variable([NFEATURES*self.F, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.matmul(y, W) + b
        return y

class fcnn2(base_model):
    """CNN using the FFT."""
    def __init__(self, F):
        super().__init__()
        self.F = F  # Number of features
    def _inference(self, x, dropout):
        with tf.name_scope('conv1'):
            # Transform to Fourier domain
            x_2d = tf.reshape(x, [-1, 28, 28])
            x_2d = tf.complex(x_2d, 0)
            xf_2d = tf.batch_fft2d(x_2d)
            xf = tf.reshape(xf_2d, [-1, NFEATURES])
            xf = tf.expand_dims(xf, 1)  # NSAMPLES x 1 x NFEATURES
            xf = tf.transpose(xf)  # NFEATURES x 1 x NSAMPLES
            # Filter
            Wreal = self._weight_variable([int(NFEATURES/2), self.F, 1])
            Wimg = self._weight_variable([int(NFEATURES/2), self.F, 1])
            W = tf.complex(Wreal, Wimg)
            xf = xf[:int(NFEATURES/2), :, :]
            yf = tf.batch_matmul(W, xf)  # for each feature
            yf = tf.concat(0, [yf, tf.conj(yf)])
            yf = tf.transpose(yf)  # NSAMPLES x NFILTERS x NFEATURES
            yf_2d = tf.reshape(yf, [-1, 28, 28])
            # Transform back to spatial domain
            y_2d = tf.batch_ifft2d(yf_2d)
            y_2d = tf.real(y_2d)
            y = tf.reshape(y_2d, [-1, self.F, NFEATURES])
            # Bias and non-linearity
            b = self._bias_variable([1, self.F, 1])
#            b = self._bias_variable([1, self.F, NFEATURES])
            y += b  # NSAMPLES x NFILTERS x NFEATURES
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*NFEATURES, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*NFEATURES])
            y = tf.matmul(y, W) + b
        return y

# Graph convolutional

In [None]:
class fgcnn2(base_model):
    """Graph CNN with full weights, i.e. patch has the same size as input."""
    def __init__(self, L, F):
        super().__init__()
        #self.L = L  # Graph Laplacian, NFEATURES x NFEATURES
        self.F = F  # Number of filters
        _, self.U = graph.fourier(L)
    def _inference(self, x, dropout):
        # x: NSAMPLES x NFEATURES
        with tf.name_scope('gconv1'):
            # Transform to Fourier domain
            U = tf.constant(self.U, dtype=tf.float32)
            xf = tf.matmul(x, U)
            xf = tf.expand_dims(xf, 1)  # NSAMPLES x 1 x NFEATURES
            xf = tf.transpose(xf)  # NFEATURES x 1 x NSAMPLES
            # Filter
            W = self._weight_variable([NFEATURES, self.F, 1])
            yf = tf.batch_matmul(W, xf)  # for each feature
            yf = tf.transpose(yf)  # NSAMPLES x NFILTERS x NFEATURES
            yf = tf.reshape(yf, [-1, NFEATURES])
            # Transform back to graph domain
            Ut = tf.transpose(U)
            y = tf.matmul(yf, Ut)
            y = tf.reshape(yf, [-1, self.F, NFEATURES])
            # Bias and non-linearity
            b = self._bias_variable([1, self.F, 1])
#            b = self._bias_variable([1, self.F, NFEATURES])
            y += b  # NSAMPLES x NFILTERS x NFEATURES
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*NFEATURES, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*NFEATURES])
            y = tf.matmul(y, W) + b
        return y

In [None]:
class lgcnn2_1(base_model):
    """Graph CNN which uses the Lanczos approximation."""
    def __init__(self, L, F, K):
        super().__init__()
        self.L = L  # Graph Laplacian, M x M
        self.F = F  # Number of filters
        self.K = K  # Polynomial order, i.e. filter size (number of hopes)
    def _inference(self, x, dropout):
        with tf.name_scope('gconv1'):
            N, M, K = x.get_shape()  # N: number of samples, M: number of features
            M = int(M)
            # Transform to Lanczos basis
            xl = tf.reshape(x, [-1, self.K])  # NM x K
            # Filter
            W = self._weight_variable([self.K, self.F])
            y = tf.matmul(xl, W)  # NM x F
            y = tf.reshape(y, [-1, M, self.F])  # N x M x F
            # Bias and non-linearity
            b = self._bias_variable([1, 1, self.F])
#            b = self._bias_variable([1, M, self.F])
            y += b  # N x M x F
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*M, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*M])
            y = tf.matmul(y, W) + b
        return y

class lgcnn2_2(base_model):
    """Graph CNN which uses the Lanczos approximation."""
    def __init__(self, L, F, K):
        super().__init__()
        self.L = L  # Graph Laplacian, M x M
        self.F = F  # Number of filters
        self.K = K  # Polynomial order, i.e. filter size (number of hopes)
    def _inference(self, x, dropout):
        with tf.name_scope('gconv1'):
            N, M = x.get_shape()  # N: number of samples, M: number of features
            M = int(M)
            # Transform to Lanczos basis
            xl = tf.transpose(x)  # M x N
            def lanczos(x):
                return graph.lanczos(self.L, x, self.K)
            xl = tf.py_func(lanczos, [xl], [tf.float32])[0]
            xl = tf.transpose(xl)  # N x M x K
            xl = tf.reshape(xl, [-1, self.K])  # NM x K
            # Filter
            W = self._weight_variable([self.K, self.F])
            y = tf.matmul(xl, W)  # NM x F
            y = tf.reshape(y, [-1, M, self.F])  # N x M x F
            # Bias and non-linearity
#            b = self._bias_variable([1, 1, self.F])
            b = self._bias_variable([1, M, self.F])
            y += b  # N x M x F
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*M, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*M])
            y = tf.matmul(y, W) + b
        return y

In [None]:
class cgcnn2_2(base_model):
    """Graph CNN which uses the Chebyshev approximation."""
    def __init__(self, L, F, K):
        super().__init__()
        self.L = graph.rescale_L(L, lmax=2)  # Graph Laplacian, M x M
        self.F = F  # Number of filters
        self.K = K  # Polynomial order, i.e. filter size (number of hopes)
    def _inference(self, x, dropout):
        with tf.name_scope('gconv1'):
            N, M = x.get_shape()  # N: number of samples, M: number of features
            M = int(M)
            # Transform to Chebyshev basis
            xc = tf.transpose(x)  # M x N
            def chebyshev(x):
                return graph.chebyshev(self.L, x, self.K)
            xc = tf.py_func(chebyshev, [xc], [tf.float32])[0]
            xc = tf.transpose(xc)  # N x M x K
            xc = tf.reshape(xc, [-1, self.K])  # NM x K
            # Filter
            W = self._weight_variable([self.K, self.F])
            y = tf.matmul(xc, W)  # NM x F
            y = tf.reshape(y, [-1, M, self.F])  # N x M x F
            # Bias and non-linearity
#            b = self._bias_variable([1, 1, self.F])
            b = self._bias_variable([1, M, self.F])
            y += b  # N x M x F
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*M, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*M])
            y = tf.matmul(y, W) + b
        return y

class cgcnn2_3(base_model):
    """Graph CNN which uses the Chebyshev approximation."""
    def __init__(self, L, F, K):
        super().__init__()
        L = graph.rescale_L(L, lmax=2)  # Graph Laplacian, M x M
        self.L = L.toarray()
        self.F = F  # Number of filters
        self.K = K  # Polynomial order, i.e. filter size (number of hopes)
    def _inference(self, x, dropout):
        with tf.name_scope('gconv1'):
            N, M = x.get_shape()  # N: number of samples, M: number of features
            M = int(M)
            # Filter
            W = self._weight_variable([self.K, self.F])
            def filter(xt, k):
                xt = tf.reshape(xt, [-1, 1])  # NM x 1
                w = tf.slice(W, [k,0], [1,-1])  # 1 x F
                y = tf.matmul(xt, w)  # NM x F
                return tf.reshape(y, [-1, M, self.F])  # N x M x F
            xt0 = x
            y = filter(xt0, 0)
            if self.K > 1:
                xt1 = tf.matmul(x, self.L, b_is_sparse=True)  # N x M
                y += filter(xt1, 1)
            for k in range(2, self.K):
                xt2 = 2 * tf.matmul(xt1, self.L, b_is_sparse=True) - xt0  # N x M
                y += filter(xt2, k)
                xt0, xt1 = xt1, xt2
            # Bias and non-linearity
#            b = self._bias_variable([1, 1, self.F])
            b = self._bias_variable([1, M, self.F])
            y += b  # N x M x F
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*M, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*M])
            y = tf.matmul(y, W) + b
        return y

class cgcnn2_4(base_model):
    """Graph CNN which uses the Chebyshev approximation."""
    def __init__(self, L, F, K):
        super().__init__()
        L = graph.rescale_L(L, lmax=2)  # Graph Laplacian, M x M
        L = L.tocoo()
        data = L.data
        indices = np.empty((L.nnz, 2))
        indices[:,0] = L.row
        indices[:,1] = L.col
        L = tf.SparseTensor(indices, data, L.shape)
        self.L = tf.sparse_reorder(L)
        self.F = F  # Number of filters
        self.K = K  # Polynomial order, i.e. filter size (number of hopes)
    def _inference(self, x, dropout):
        with tf.name_scope('gconv1'):
            N, M = x.get_shape()  # N: number of samples, M: number of features
            M = int(M)
            # Filter
            W = self._weight_variable([self.K, self.F])
            def filter(xt, k):
                xt = tf.transpose(xt)  # N x M
                xt = tf.reshape(xt, [-1, 1])  # NM x 1
                w = tf.slice(W, [k,0], [1,-1])  # 1 x F
                y = tf.matmul(xt, w)  # NM x F
                return tf.reshape(y, [-1, M, self.F])  # N x M x F
            xt0 = tf.transpose(x)  # M x N
            y = filter(xt0, 0)
            if self.K > 1:
                xt1 = tf.sparse_tensor_dense_matmul(self.L, xt0)
                y += filter(xt1, 1)
            for k in range(2, self.K):
                xt2 = 2 * tf.sparse_tensor_dense_matmul(self.L, xt1) - xt0  # M x N
                y += filter(xt2, k)
                xt0, xt1 = xt1, xt2
            # Bias and non-linearity
#            b = self._bias_variable([1, 1, self.F])
            b = self._bias_variable([1, M, self.F])
            y += b  # N x M x F
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*M, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*M])
            y = tf.matmul(y, W) + b
        return y

class cgcnn2_5(base_model):
    """Graph CNN which uses the Chebyshev approximation."""
    def __init__(self, L, F, K):
        super().__init__()
        L = graph.rescale_L(L, lmax=2)  # Graph Laplacian, M x M
        L = L.tocoo()
        data = L.data
        indices = np.empty((L.nnz, 2))
        indices[:,0] = L.row
        indices[:,1] = L.col
        L = tf.SparseTensor(indices, data, L.shape)
        self.L = tf.sparse_reorder(L)
        self.F = F  # Number of filters
        self.K = K  # Polynomial order, i.e. filter size (number of hopes)
    def _inference(self, x, dropout):
        with tf.name_scope('gconv1'):
            N, M = x.get_shape()  # N: number of samples, M: number of features
            M = int(M)
            # Transform to Chebyshev basis
            xt0 = tf.transpose(x)  # M x N
            xt = tf.expand_dims(xt0, 0)  # 1 x M x N
            def concat(xt, x):
                x = tf.expand_dims(x, 0)  # 1 x M x N
                return tf.concat(0, [xt, x])  # K x M x N
            if self.K > 1:
                xt1 = tf.sparse_tensor_dense_matmul(self.L, xt0)
                xt = concat(xt, xt1)
            for k in range(2, self.K):
                xt2 = 2 * tf.sparse_tensor_dense_matmul(self.L, xt1) - xt0  # M x N
                xt = concat(xt, xt2)
                xt0, xt1 = xt1, xt2
            xt = tf.transpose(xt)  # N x M x K
            xt = tf.reshape(xt, [-1,self.K])  # NM x K
            # Filter
            W = self._weight_variable([self.K, self.F])
            y = tf.matmul(xt, W)  # NM x F
            y = tf.reshape(y, [-1, M, self.F])  # N x M x F
            # Bias and non-linearity
#            b = self._bias_variable([1, 1, self.F])
            b = self._bias_variable([1, M, self.F])
            y += b  # N x M x F
            y = tf.nn.relu(y)
        with tf.name_scope('fc1'):
            W = self._weight_variable([self.F*M, NCLASSES])
            b = self._bias_variable([NCLASSES])
            y = tf.reshape(y, [-1, self.F*M])
            y = tf.matmul(y, W) + b
        return y

In [None]:
class cgcnn(base_model):
    """
    Graph CNN which uses the Chebyshev approximation.

    The following are hyper-parameters of graph convolutional layers.
    They are lists, which length is equal to the number of gconv layers.
        F: Number of features.
        K: List of polynomial orders, i.e. filter sizes or number of hopes.
        p: Pooling size.
           Should be 1 (no pooling) or a power of 2 (reduction by 2 at each coarser level).
           Beware to have coarsened enough.

    L: List of Graph Laplacians. Size M x M. One per coarsening level.

    The following are hyper-parameters of fully connected layers.
    They are lists, which length is equal to the number of fc layers.
        M: Number of features per sample, i.e. number of hidden neurons.
    
    The following are choices of implementation for various blocks.
        filter: filtering operation, e.g. chebyshev5, lanczos2 etc.
        brelu: bias and relu, e.g. b1relu or b2relu.
        pool: pooling, e.g. mpool1.
    """
    def __init__(self, L, F, K, p, M, filter='chebyshev5', brelu='b1relu', pool='mpool1'):
        super().__init__()
        
        # Verify the consistency w.r.t. the number of layers.
        assert len(L) >= len(F) == len(K) == len(p)
        assert np.all(np.array(p) >= 1)
        p_log2 = np.where(np.array(p) > 1, np.log2(p), 0)
        assert np.all(np.mod(p_log2, 1) == 0)  # Powers of 2.
        assert len(L) >= 1 + np.sum(p_log2)  # Enough coarsening levels for pool sizes.
        
        # Keep the useful Laplacians only.
        j = 0
        self.L = []
        L_shape = []
        for i in range(len(p)):
            self.L.append(L[j])
            L_shape.append(L[j].shape[0])
            j += int(np.log2(p[i])) if p[i] > 1 else 0
        L = self.L
        for i in range(len(L)):
            L[i] = graph.rescale_L(L[i], lmax=2)
        
        # Transform the Laplacians to TF sparse matrices.
        if filter == 'chebyshev5':
            with tf.name_scope('laplacians'):
                for i in range(len(L)):
                    L[i] = L[i].tocoo()
                    data = L[i].data
                    indices = np.empty((L[i].nnz, 2))
                    indices[:,0] = L[i].row
                    indices[:,1] = L[i].col
                    L[i] = tf.SparseTensor(indices, data, L[i].shape)
                    L[i] = tf.sparse_reorder(L[i])
        
        # Store attributes and bind operations.
        self.L, self.F, self.K, self.p, self.M = L, F, K, p, M
        self.filter = getattr(self, filter)
        self.brelu = getattr(self, brelu)
        self.pool = getattr(self, pool)
        
        # Print information about NN architecture.
        Ngconv = len(p)
        Nfc = len(M)
        print('NN architecture')
        print('  input: M_0 = {}'.format(L_shape[0]))
        for i in range(Ngconv):
            print('  layer {0}: cgconv{0}'.format(i+1))
            print('    representation: M_{0} * F_{0} / p_{0} = {1} * {2} / {3} = {4}'.format(
                    i+1, L_shape[i], F[i], p[i], L_shape[i]*F[i]//p[i]))
            F_last = F[i-1] if i > 0 else 1
            print('    weights: F_{1} * F_{0} * K_{0} = {2} * {3} * {4} = {5}'.format(
                    i+1, i, F_last, F[i], K[i], F_last*F[i]*K[i]))
            if brelu == 'b1relu':
                print('    biases: F_{} = {}'.format(i+1, F[i]))
            elif brelu == 'b2relu':
                print('    biases: M_{0} * F_{0} = {1} * {2} = {3}'.format(
                        i+1, L_shape[i], F[i], L_shape[i]*F[i]))
        def M_last(i):
            return M[i-1] if i > 0 else L_shape[-1] * F[-1] // p[-1]
        def lprint(i, M_i):
            print('    representation: M_{} = {}'.format(Ngconv+i+1, M_i))
            print('    weights: M_{} * M_{} = {} * {} = {}'.format(
                    Ngconv+i, Ngconv+i+1, M_last(i), M_i, M_last(i)*M_i))
            print('    biases: M_{} = {}'.format(Ngconv+i+1, M_i))
        for i in range(Nfc):
            print('  layer {}: fc{}'.format(Ngconv+i+1, i+1))
            lprint(i, M[i])
        print('  layer {}: logits (softmax)'.format(Ngconv+Nfc+1))
        lprint(Nfc, NCLASSES)

    def chebyshev2(self, x, L, Fout, K):
        """
        Filtering with Chebyshev interpolation
        Implementation: numpy.
        
        Data: x of size N x M x F
            N: number of signals
            M: number of vertices
            F: number of features per signal per vertex
        """
        N, M, Fin = x.get_shape()
        N, M, Fin = int(N), int(M), int(Fin)
        # Transform to Chebyshev basis
        x = tf.transpose(x, perm=[1, 2, 0])  # M x Fin x N
        x = tf.reshape(x, [M, Fin*N])  # M x Fin*N
        def chebyshev(x):
            return graph.chebyshev(L, x, K)
        x = tf.py_func(chebyshev, [x], [tf.float32])[0]  # K x M x Fin*N
        x = tf.reshape(x, [K, M, Fin, N])  # K x M x Fin x N
        x = tf.transpose(x, perm=[3,1,2,0])  # N x M x Fin x K
        x = tf.reshape(x, [N*M, Fin*K])  # N*M x Fin*K
        # Filter: Fin*Fout filters of order K, i.e. one filterbank per feature.
        W = self._weight_variable([Fin*K, Fout], regularization=False)
        x = tf.matmul(x, W)  # NM x Fout
        return tf.reshape(x, [N, M, Fout])  # N x M x Fout

    def chebyshev5(self, x, L, Fout, K):
        N, M, Fin = x.get_shape()
        N, M, Fin = int(N), int(M), int(Fin)
        # Transform to Chebyshev basis
        x0 = tf.transpose(x, perm=[1, 2, 0])  # M x Fin x N
        x0 = tf.reshape(x0, [M, Fin*N])  # M x Fin*N
        x = tf.expand_dims(x0, 0)  # 1 x M x Fin*N
        def concat(x, x_):
            x_ = tf.expand_dims(x_, 0)  # 1 x M x Fin*N
            return tf.concat(0, [x, x_])  # K x M x Fin*N
        if K > 1:
            x1 = tf.sparse_tensor_dense_matmul(L, x0)
            x = concat(x, x1)
        for k in range(2, K):
            x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0  # M x Fin*N
            x = concat(x, x2)
            x0, x1 = x1, x2
        x = tf.reshape(x, [K, M, Fin, N])  # K x M x Fin x N
        x = tf.transpose(x, perm=[3,1,2,0])  # N x M x Fin x K
        x = tf.reshape(x, [N*M, Fin*K])  # N*M x Fin*K
        # Filter: Fin*Fout filters of order K, i.e. one filterbank per feature.
        W = self._weight_variable([Fin*K, Fout], regularization=False)
        x = tf.matmul(x, W)  # NM x Fout
        return tf.reshape(x, [N, M, Fout])  # N x M x Fout

    def b1relu(self, x):
        """Bias and ReLU. One bias per filter."""
        N, M, F = x.get_shape()
        b = self._bias_variable([1, 1, int(F)], regularization=False)
        return tf.nn.relu(x + b)

    def b2relu(self, x):
        """Bias and ReLU. One bias per vertex per filter."""
        N, M, F = x.get_shape()
        b = self._bias_variable([1, int(M), int(F)], regularization=False)
        return tf.nn.relu(x + b)

    def mpool1(self, x, p):
        """Max pooling of size p. Should be a power of 2."""
        if p > 1:
            x = tf.expand_dims(x, 3)  # N x M x F x 1
            x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')
            #tf.maximum
            return tf.squeeze(x, [3])  # N x M/p x F
        else:
            return x

    def fc(self, x, Mout, relu=True):
        """Fully connected layer with Mout features."""
        N, Min = x.get_shape()
        W = self._weight_variable([int(Min), Mout], regularization=True)
        b = self._bias_variable([Mout], regularization=True)
        x = tf.matmul(x, W) + b
        return tf.nn.relu(x) if relu else x

    def _inference(self, x, dropout):
        # Graph convolutional layers.
        x = tf.expand_dims(x, 2)  # N x M x F=1
        for i in range(len(self.p)):
            with tf.variable_scope('cgconv{}'.format(i+1)):
                with tf.name_scope('filter'):
                    x = self.filter(x, self.L[i], self.F[i], self.K[i])
                with tf.name_scope('bias_relu'):
                    x = self.brelu(x)
                with tf.name_scope('pooling'):
                    x = self.pool(x, self.p[i])
        
        # Fully connected hidden layers.
        N, M, F = x.get_shape()
        x = tf.reshape(x, [int(N), int(M*F)])  # N x M
        for i in range(len(self.M)):
            with tf.variable_scope('fc{}'.format(i+1)):
                x = self.fc(x, self.M[i])
                x = tf.nn.dropout(x, dropout)
        
        # Logits linear layer, i.e. softmax without normalization.
        with tf.variable_scope('logits'):
            x = self.fc(x, NCLASSES, relu=False)
        return x