In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import os
from sklearn.metrics import confusion_matrix

In [2]:
#Gene expression data
data = pd.read_csv("../../data/gene_expression_clinical.csv",index_col=0)

In [3]:
#Remove non-relavant columns
data.drop(data.columns[[60483,60484,60485,60486,60488,60489,60490,60491,60492,60493,60494,60495]],axis=1,inplace=True)
data.head()

Unnamed: 0,ENSG00000000003.13,ENSG00000000005.5,ENSG00000000419.11,ENSG00000000457.12,ENSG00000000460.15,ENSG00000000938.11,ENSG00000000971.14,ENSG00000001036.12,ENSG00000001084.9,ENSG00000001167.13,...,ENSGR0000264510.4,ENSGR0000264819.4,ENSGR0000265658.4,ENSGR0000270726.4,ENSGR0000275287.3,ENSGR0000276543.3,ENSGR0000277120.3,ENSGR0000280767.1,ENSGR0000281849.1,sample_type
X15a44c9d.7c84.4170.96a1.358dd796aa65.htseq.counts,4219,4,1070,565,133,1493,54514,2894,6042,867,...,0,0,0,0,0,0,0,0,0,0
X160aee04.df36.4e94.90c5.b01b2991ba48.htseq.counts,3428,5,821,502,537,278,103347,2307,4815,486,...,0,0,0,0,0,0,0,0,0,0
X1d86dc66.1a62.4cbc.9973.ae63ab754d6a.htseq.counts,5284,7,1403,704,119,629,98287,3840,7077,673,...,0,0,0,0,0,0,0,0,0,0
X2b8bf629.3c22.4dcb.a9a5.ec01c5099167.htseq.counts,3236,0,697,643,56,200,39678,1685,5872,477,...,0,0,0,0,0,0,0,0,0,0
X2cc2e3ce.68cd.4690.9fff.5ecf86c2f57a.htseq.counts,3051,11,984,353,70,387,36361,2870,4070,586,...,0,0,0,0,0,0,0,0,0,0


In [4]:
#Principle component analysis
from sklearn.decomposition import PCA

In [5]:
y = np.zeros((data.shape[0],2))
y[:,0] = data['sample_type'].values
y[:,1] = [1 if x==0 else 0 for x in y[:,0]]
Y = y
#Zero in the first field is benign, and zero in the second field is malignant
Y

array([[ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       ..., 
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.]])

In [6]:
pca = PCA(n_components=15) #Make a PCA object with n = 15 PCs
pca.fit(data.drop(['sample_type'],axis=1))

PCA(copy=True, iterated_power='auto', n_components=15, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [7]:
pc = pca.transform(data.drop(['sample_type'],axis=1))
pc.shape

(1114, 15)

In [8]:
#Shuffle the data set
X,Y = shuffle(pc,Y,random_state=1)

In [9]:
#Convert the dataset into train and test set
train_x, test_x, train_y, test_y = train_test_split(X,Y,test_size=0.30, random_state=415)

In [291]:
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)

(779, 15)
(779, 2)
(335, 15)


In [292]:
allowed_optimizers = ['gd']
allowed_activations = ['sigmoid', 'tanh', 'softmax', 'relu', 'linear']
allowed_losses = ['rmse', 'cross_entropy']

class ModularNeuralNet:
    """
    Modular deep artificial neural network implemented in a sklearn style
    """
    
    #Class assertions
    def assertions(self):
        global allowed_optimizers, allowed_activations, allowed_losses
        assert self.loss in allowed_losses, 'Invalid loss function given'
        assert self.optimizer in allowed_optimizers, 'Invalid optimizer given'
        assert all(x in allowed_activations for x in self.activations), 'Invalid activation function used'
        assert self.epochs > 0, 'Number of epochs must be greater than 0'
    
    def __init__(self, input_dim = 15,n_class=2,hidden_nodes=[16,16,16,16],lr=0.1,epochs=10,
                activations=['relu','relu','relu','sigmoid'],loss='cross_entropy',
                optimizer='gd',batch_size=100, print_step=1,graph=False,save_model=False):
        self.n_class = n_class
        self.hidden_nodes = hidden_nodes
        self.hidden_layers = len(self.hidden_nodes)
        self.input_dim = input_dim       
        self.lr = lr
        self.epochs = epochs
        self.activations = activations
        self.loss = loss
        self.optimizer = optimizer
        self.batch_size = batch_size
        self.print_step = print_step
        self.graph = graph
        self.save_model = save_model
        self.assertions()
        
        self.model_path = os.path.join(os.getcwd(),"model\\")
        self.cost_history = []
        self.mse_history = []
        self.accuracy_history = np.empty(shape=[1],dtype=float)
        self.weights_dim = [self.input_dim] + self.hidden_nodes
        
    #Fit takes training input data and trains a neural network with user specified infrastructure
    def fit(self, train_x, train_y):
        '''
        :param x: m x p dataframe
        :return: trained weights and bias for the sdae       
        '''
        
        #Generate tensorflow variables for the weights and biases
        weights = self.weight()
        biases = self.bias()
        
        x = tf.placeholder(dtype=tf.float32,shape=[None, self.input_dim])
        W = tf.Variable(tf.zeros([self.input_dim, self.n_class]))
        b = tf.Variable(tf.zeros([self.n_class]))
        y_ = tf.placeholder(dtype=tf.float32,shape=[None,self.n_class])
        y = tf.placeholder(dtype=tf.float32,shape=[None,self.n_class])
        
        #Initialize variables
        init = tf.global_variables_initializer()
        
        #Initialize saver class
        saver = tf.train.Saver()
        
        #Call the defined model
        y = self.mlp(x,weights,biases)

        #Define the cost function and optimizer
        cost_function = self.cost(self.loss, y, y_)
        training_step = self.optimizers(self.lr,cost_function)
        
        #Initialize tensorflow session
        sess = tf.Session()
        sess.run(init)
        
        for epoch in range(self.epochs):
            sess.run(training_step, feed_dict={x:train_x, y_:train_y})
            #Current training cost
            cost = sess.run(cost_function, feed_dict={x:train_x,y_:train_y})
            self.cost_history.append(cost)
            #Test set MSE
            #pred_y = sess.run(y,feed_dict={x:test_x})
           # mse = tf.reduce_mean(tf.square(pred_y - test_y))
           # mse_ = sess.run(mse)
           # mse_history.append(mse_)
            ##Add accuracy history##
            print('epoch : ', epoch, ' - ', 'cost: ', cost)
        
        return sess.run(cost_function, feed_dict={x: train_x, y_:train_y})
        
        
    #Implements the desired cost function
    def cost(self,loss, y, y_):
        if loss == 'rmse':
            return tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y_, y))))
        elif loss == 'cross_entropy':
            #Removed tf.reduce_mean
            return tf.losses.softmax_cross_entropy(logits=y,onehot_labels=y_)
    
    #Implements the desired cost function
    def optimizers(self, lr, loss):
        if self.optimizer == 'gd':
            return tf.train.GradientDescentOptimizer(lr).minimize(loss)
        
        elif self.optimizer == 'adam':
            return tf.train.AdamOptimizer(lr).minimize(loss)
    
    #Implements the desired activation function
    def activate(self, linear, name):
        if name == 'sigmoid':
            return tf.nn.sigmoid(linear, name='layer')
        elif name == 'softmax':
            return tf.nn.softmax(linear, name='layer')
        elif name == 'linear':
            return linear
        elif name == 'tanh':
            return tf.nn.tanh(linear, name='layer')
        elif name == 'relu':
            return tf.nn.relu(linear, name='layer')
        
    def weight(self):
        weights = {'out':tf.Variable(tf.truncated_normal([self.weights_dim[-1],self.n_class]),
                                     dtype=tf.float32)}
        for i in range(self.hidden_layers):
            var = 'h'+str(i)
            weights[var] = tf.Variable(tf.truncated_normal([self.weights_dim[i],self.weights_dim[i+1]]),
                                      dtype=tf.float32)
        return weights
    
    def bias(self):
        biases = {'out':tf.Variable(tf.truncated_normal([self.n_class]),
                                   dtype=tf.float32)}
        for i in range(self.hidden_layers):
            var = var = 'b'+str(i)
            biases[var] = tf.Variable(tf.truncated_normal([self.hidden_nodes[i]]),
                                     dtype=tf.float32)
        return biases
    
    #Define the multiple layer perceptron model
    def mlp(self, x, weights, biases):

        layer = x
        #Hidden Layers
        for i in range(self.hidden_layers):
            layer = tf.add(tf.matmul(layer,weights['h'+str(i)]),biases['b'+str(i)])
            layer = self.activate(layer,self.activations[i])
            
        #Output Layer
        out_layer = tf.add(tf.matmul(layer, weights['out']), biases['out'])
        
        return out_layer

In [299]:
#Initiate class
model = ModularNeuralNet(epochs=200,hidden_nodes=[4,8,12,16],graph=True,save_model=True)

In [300]:
model.fit(train_x, train_y)

epoch :  0  -  cost:  1.31639
epoch :  1  -  cost:  0.951491
epoch :  2  -  cost:  0.761017
epoch :  3  -  cost:  0.664189
epoch :  4  -  cost:  0.615641
epoch :  5  -  cost:  0.585224
epoch :  6  -  cost:  0.569463
epoch :  7  -  cost:  0.567379
epoch :  8  -  cost:  0.561877
epoch :  9  -  cost:  0.557706
epoch :  10  -  cost:  0.553518
epoch :  11  -  cost:  0.550464
epoch :  12  -  cost:  0.547702
epoch :  13  -  cost:  0.544501
epoch :  14  -  cost:  0.54482
epoch :  15  -  cost:  0.541845
epoch :  16  -  cost:  0.540509
epoch :  17  -  cost:  0.539085
epoch :  18  -  cost:  0.539901
epoch :  19  -  cost:  0.539355
epoch :  20  -  cost:  0.53925
epoch :  21  -  cost:  0.537284
epoch :  22  -  cost:  0.536138
epoch :  23  -  cost:  0.535015
epoch :  24  -  cost:  0.532447
epoch :  25  -  cost:  0.531383
epoch :  26  -  cost:  0.529564
epoch :  27  -  cost:  0.527776
epoch :  28  -  cost:  0.526214
epoch :  29  -  cost:  0.525201
epoch :  30  -  cost:  0.523375
epoch :  31  -  cost:

0.47609997

In [152]:
model.loss

'cross_entropy'

In [88]:
model.hidden_layers

4

In [74]:
weights = {'h'+str(i):2}

In [86]:
weights

{'out': <tf.Variable 'Variable:0' shape=(16, 2) dtype=float32_ref>}

In [94]:
i = 1
j = 0
k = 0

In [107]:
weights = {'out':tf.Variable(tf.truncated_normal([model.weights_dim[-1],model.n_class]))}
for i in range(model.hidden_layers):
    var = 'h'+str(i)
    weights[var] = tf.Variable(tf.truncated_normal([model.weights_dim[i],model.weights_dim[i+1]]))

weights

{'h0': <tf.Variable 'Variable_7:0' shape=(15, 4) dtype=float32_ref>,
 'h1': <tf.Variable 'Variable_8:0' shape=(4, 8) dtype=float32_ref>,
 'h2': <tf.Variable 'Variable_9:0' shape=(8, 12) dtype=float32_ref>,
 'h3': <tf.Variable 'Variable_10:0' shape=(12, 16) dtype=float32_ref>,
 'out': <tf.Variable 'Variable_6:0' shape=(16, 2) dtype=float32_ref>}

In [102]:
weights

{'h1': <tf.Variable 'Variable_1:0' shape=(15, 4) dtype=float32_ref>,
 'out': <tf.Variable 'Variable:0' shape=(16, 2) dtype=float32_ref>}

In [118]:
apple = [1,2,3,4,5]

In [119]:
apple.append(1)

In [120]:
apple

[1, 2, 3, 4, 5, 1]

In [122]:
apple = np.append(apple,2)

In [123]:
apple

array([1, 2, 3, 4, 5, 1, 2])

In [124]:
np.append(apple,2)

array([1, 2, 3, 4, 5, 1, 2, 2])

In [126]:
np.random.rand(5).append(2)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [195]:
#Variables and parameters to work with tensors
learning_rate = 0.1
training_epochs = 10
cost_history = np.empty(shape=[1],dtype=float)
n_dim = 15
n_class = 2
model_path = os.path.join(os.getcwd(),"model\\")

n_hidden_1 = 16
n_hidden_2 = 16
n_hidden_3 = 16
n_hidden_4 = 16

x = tf.placeholder(tf.float32, [None, n_dim])
W = tf.Variable(tf.zeros([n_dim,n_class]))
b = tf.Variable(tf.zeros([n_class]))
y_ = tf.placeholder(tf.float32,[None,n_class])

In [199]:
def mlp(x, weights,biases):

    layer = x

    #Hidden Layers
    for i in range(model.hidden_layers):
        layer = tf.add(tf.matmul(layer,weights['h'+str(i)]),biases['b'+str(i)])
        layer = model.activate(layer,model.activations[i])
            
    #Output Layer
    out_layer = tf.add(tf.matmul(layer, weights['out']), biases['out'])
    return out_layer

In [200]:
weights = {
    'h0':tf.Variable(tf.truncated_normal([n_dim,n_hidden_1])),
    'h1':tf.Variable(tf.truncated_normal([n_hidden_1,n_hidden_2])),
    'h2':tf.Variable(tf.truncated_normal([n_hidden_2,n_hidden_3])),
    'h3':tf.Variable(tf.truncated_normal([n_hidden_3,n_hidden_4])),
    'out':tf.Variable(tf.truncated_normal([n_hidden_4,n_class]))
}
biases = {
    'b0':tf.Variable(tf.truncated_normal([n_hidden_1])),
    'b1':tf.Variable(tf.truncated_normal([n_hidden_2])),
    'b2':tf.Variable(tf.truncated_normal([n_hidden_3])),
    'b3':tf.Variable(tf.truncated_normal([n_hidden_4])),
    'out':tf.Variable(tf.truncated_normal([n_class]))
}

In [205]:
y = mlp(x, weights, biases)
y

<tf.Tensor 'Add_39:0' shape=(?, 2) dtype=float32>

In [221]:
weights['h0']

<tf.Variable 'Variable_180:0' shape=(15, 16) dtype=float32_ref>

In [187]:
layer

NameError: name 'layer' is not defined

In [271]:
learning_rate

0.1

In [285]:
model.optimizer

'gd'