In [35]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [36]:
df = pd.read_csv('./datasets/cancer.csv')
# test = pd.read_csv('./datasets/MNIST/test.csv')
df.head()

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Label
0,5,1,1,1,2,1,3,1,1,1
1,5,4,4,5,7,10,3,2,1,1
2,3,1,1,1,2,2,3,1,1,1
3,6,8,8,1,3,4,3,7,1,1
4,4,1,1,3,2,1,3,1,1,1


In [37]:
df_norm = df[df['Label']==1]
df_norm.head()

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Label
0,5,1,1,1,2,1,3,1,1,1
1,5,4,4,5,7,10,3,2,1,1
2,3,1,1,1,2,2,3,1,1,1
3,6,8,8,1,3,4,3,7,1,1
4,4,1,1,3,2,1,3,1,1,1


In [38]:
df_anom = df[df['Label']== -1]
df_anom.head()

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Label
15,7,3,2,10,5,10,5,4,4,-1
28,2,5,3,3,6,7,7,5,1,-1
32,7,8,7,2,4,8,3,8,2,-1
33,10,6,6,3,4,5,3,6,1,-1
35,9,10,10,1,10,8,3,3,1,-1


In [39]:
ds_norm = df_norm.values
ds_anom = df_anom.values
print(ds_norm.shape)
print(ds_anom.shape)

(444, 10)
(39, 10)


## Train Test split

In [62]:
X_train = ds_norm[:400, :-1]
Y_train = ds_norm[:400, -1]
Y_train = Y_train.reshape(-1, 1)
print(X_train.shape)
print(Y_train.shape)

(400, 9)
(400, 1)


In [63]:
x_test = ds_norm[400:, :-1]
y_test = ds_norm[400: , -1]
y_test = y_test.reshape(-1, 1)
x_anom = ds_anom[:, :-1]
y_anom = ds_anom[:, -1]
# X_test = np.stack((X_test, ds_anom[:, :-1]), axis=0)
# Y_test = np.stack((Y_test, ds_anom[:, -1]), axis=0)
# print(X_test.shape, Y_test.shape)

In [64]:
y_anom = y_anom.reshape(-1, 1)
X_test = np.vstack((x_test,x_anom))
Y_test = np.vstack((y_test, y_anom))
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(400, 9) (400, 1)
(83, 9) (83, 1)


## Model

In [9]:
# CHECK : Constants
omega = 1.

class ELM(object):
    def __init__(self, sess, batch_size, input_len, hidden_num, output_len):
        '''
        Args:
          sess : TensorFlow session.
          batch_size : The batch size (N)
          input_len : The length of input. (L)
          hidden_num : The number of hidden node. (K)
          output_len : The length of output. (O)
        '''
    
        self._sess = sess 
        self._batch_size = batch_size
        self._input_len = input_len
        self._hidden_num = hidden_num
        self._output_len = output_len 

        # for train
        self._x0 = tf.placeholder(tf.float32, [self._batch_size, self._input_len])
        self._t0 = tf.placeholder(tf.float32, [self._batch_size, self._output_len])

        # for test
        self._x1 = tf.placeholder(tf.float32, [None, self._input_len])
        self._t1 = tf.placeholder(tf.float32, [None, self._output_len])

        self._W = tf.Variable(
          tf.random_normal([self._input_len, self._hidden_num]),
          trainable=False, dtype=tf.float32)
        self._b = tf.Variable(
          tf.random_normal([self._hidden_num]),
          trainable=False, dtype=tf.float32)
        self._beta = tf.Variable(
          tf.zeros([self._hidden_num, self._output_len]),
          trainable=False, dtype=tf.float32)
        self._var_list = [self._W, self._b, self._beta]

        self.H0 = tf.matmul(self._x0, self._W) + self._b # N x L
        self.H0_T = tf.transpose(self.H0)

        self.H1 = tf.matmul(self._x1, self._W) + self._b # N x L
        self.H1_T = tf.transpose(self.H1)

        # beta analytic solution : self._beta_s (K x O)
        if self._input_len < self._hidden_num: # L < K
            identity = tf.constant(np.identity(self._hidden_num), dtype=tf.float32)
            self._beta_s = tf.matmul(tf.matmul(tf.matrix_inverse(
                tf.matmul(self.H0_T, self.H0) + identity/omega), 
                self.H0_T), self._t0)
          # _beta_s = (H_T*H + I/om)^(-1)*H_T*T
        else:
            identity = tf.constant(np.identity(self._batch_size), dtype=tf.float32)
            self._beta_s = tf.matmul(tf.matmul(self.H0_T, tf.matrix_inverse(
                tf.matmul(self.H0, self.H0_T)+identity/omega)), self._t0)
          # _beta_s = H_T*(H*H_T + I/om)^(-1)*T

        self._assign_beta = self._beta.assign(self._beta_s)
        self._fx0 = tf.matmul(self.H0, self._beta)
        self._fx1 = tf.matmul(self.H1, self._beta)

        self._cost = tf.reduce_mean(tf.cast(tf.losses.mean_squared_error(labels=self._t0, predictions=self._fx0), tf.float32))
                                        
        self._init = False
        self._feed = False

        # for the mnist test
        self._correct_prediction = tf.equal(tf.argmax(self._fx1,1), tf.argmax(self._t1,1))
        self._accuracy = tf.reduce_mean(tf.cast(self._correct_prediction, tf.float32))
        self._testcost = tf.reduce_mean(tf.cast(tf.losses.mean_squared_error(labels=self._t1, predictions=self._fx1), tf.float32))


    def feed(self, x, t):
        '''
        Args :
          x : input array (N x L)
          t : label array (N x O)
        '''

        if not self._init : self.init()
        self._sess.run(self._assign_beta, {self._x0:x, self._t0:t})
        print(self._sess.run(self._cost, {self._x0:x, self._t0:t}))
        self._feed = True

    def init(self):
        self._sess.run(tf.initialize_variables(self._var_list))
        self._init = True

    def test(self, x, t=None):
        if not self._feed : exit("Not feed-forward trained")
        if t is not None :
            print("Accuracy: {:.9f}".format(self._sess.run(self._accuracy, {self._x1:x, self._t1:t})))
            print(self._sess.run(self._testcost, {self._x1:x, self._t1:t}))
        else :
            return self._sess.run(self._fx1, {self._x1:x})


In [10]:
# Basic tf setting
tf.set_random_seed(2016)
sess = tf.Session()

In [11]:
## training for outlier detetcion
batch_size = 5000
hidden_num = 150
print("batch_size : {}".format(batch_size))
print("hidden_num : {}".format(hidden_num))
elm = ELM(sess, batch_size, 784, hidden_num, 784)

# one-step feed-forward training
train_x, train_y = (X_train[:batch_size], X_train[:batch_size])
print(X_train[:batch_size].shape)
elm.feed(train_x, train_y)

batch_size : 5000
hidden_num : 150
(5000, 784)
Instructions for updating:
Use `tf.variables_initializer` instead.
0.0083903605


In [12]:
elm.test(X_test, X_test)

Accuracy: 0.015157527
0.00893891
