In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
import tensorflow as tf

In [3]:
def generate_random(orig_data, feature_names, data_size):
    '''
    Input: Data, Feature Names, Size of Random Data
    Output: Random Data
    
    Generates random dataset of size data_size
    Random dataset is generated from normal
    distribution with specified high and low values
    '''
    
    df = {}
    for i in range(len(feature_names)):
        low = np.min(orig_data[:, i])
        high = np.max(orig_data[:, i])
        df[feature_names[i]] = np.random.uniform(low=low, high=high, size=data_size)
    random_dataset = pd.DataFrame(data=df, columns=feature_names)
    
    return random_dataset

### Neural Network Definition

In [4]:
def constructNetwork(num_features, num_classes, num_hidden, X, y, X_random=None, NUM_EPOCHS=10, SKIP_STEP=5, LEARNING_RATE=1e-4):
    
    with tf.variable_scope("Placeholder") as scope:
        input_tensor = tf.placeholder(dtype=tf.float32, shape=[None, num_features], name="input")
        label = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name="label")
    
    with tf.variable_scope("hidden_layer") as scope:
        w = tf.get_variable(dtype=tf.float32, shape=[num_features, num_hidden], initializer=tf.random_normal_initializer(), name="weights")
        b = tf.get_variable(dtype=tf.float32, shape=[num_hidden], initializer=tf.random_normal_initializer(), name="biases")
        out = tf.nn.relu(tf.matmul(input_tensor, w) + b)
        
    with tf.variable_scope("output_layer") as scope:
        w = tf.get_variable(dtype=tf.float32, shape=[num_hidden, num_classes], initializer=tf.random_normal_initializer(), name="weights")
        b = tf.get_variable(dtype=tf.float32, shape=[num_classes], initializer=tf.random_normal_initializer(), name="biases")
        softmax = tf.nn.softmax(tf.matmul(out, w) + b)
        
    with tf.variable_scope("loss") as scope:
        loss = tf.reduce_mean(-tf.reduce_sum(label*tf.log(tf.clip_by_value(softmax,1e-10,1.0)), reduction_indices=[1]))
        
    with tf.variable_scope("optimizer") as scope:
        optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)
    
    with tf.variable_scope("accuracy") as scope:
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.arg_max(softmax, 1), tf.arg_max(label, 1)), dtype=tf.float32))
        
    with tf.Session() as sess:
    
        init = tf.global_variables_initializer()
        sess.run(init)
    
        for i in range(1, NUM_EPOCHS+1):
        
            _, l, acc = sess.run([optimizer, loss, accuracy], feed_dict={input_tensor:X, label:y})
        
            if i % SKIP_STEP == 0:
                
                print 'Epoch: {}\n Loss: {}\t Accuracy: {}'.format(i, l, acc)
            
        try:
            y_random = sess.run([softmax], feed_dict={input_tensor: X_random})
            return y_random
        except:
            return

### Cancer Dataset (30 features)

In [5]:
from sklearn import datasets

In [6]:
## Gettting the data
data = datasets.load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names
print X.shape, y.shape

(569, 30) (569,)


In [7]:
y = pd.get_dummies(y).values
print y.shape

(569, 2)


In [8]:
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)
print np.mean(X), np.std(X)

-6.11890932377e-16 1.0


In [9]:
X_random = generate_random(X, feature_names, data_size=1000).values
print X_random.shape

(1000, 30)


In [12]:
tf.reset_default_graph()
y_random = constructNetwork(30, 2, 128, X, y, X_random, NUM_EPOCHS=3000, SKIP_STEP=1000, LEARNING_RATE=1e-4)

Epoch: 1000
 Loss: 0.715131878853	 Accuracy: 0.931458711624
Epoch: 2000
 Loss: 0.235907152295	 Accuracy: 0.978910386562
Epoch: 3000
 Loss: 0.187432423234	 Accuracy: 0.985940217972


In [21]:
tf.reset_default_graph()
preds = constructNetwork(30, 2, 128, X_random, y_random[0], X, NUM_EPOCHS=20000, SKIP_STEP=1000, LEARNING_RATE=1e-2)

Epoch: 1000
 Loss: 0.815012216568	 Accuracy: 0.966000020504
Epoch: 2000
 Loss: 0.820866465569	 Accuracy: 0.961000025272
Epoch: 3000
 Loss: 0.809443116188	 Accuracy: 0.964999973774
Epoch: 4000
 Loss: 0.534660756588	 Accuracy: 0.977999985218
Epoch: 5000
 Loss: 0.53012150526	 Accuracy: 0.976999998093
Epoch: 6000
 Loss: 0.531763911247	 Accuracy: 0.977999985218
Epoch: 7000
 Loss: 0.531323313713	 Accuracy: 0.976000010967
Epoch: 8000
 Loss: 0.527147769928	 Accuracy: 0.976000010967
Epoch: 9000
 Loss: 0.536090373993	 Accuracy: 0.975000023842
Epoch: 10000
 Loss: 0.525169551373	 Accuracy: 0.976999998093
Epoch: 11000
 Loss: 0.524739384651	 Accuracy: 0.976999998093
Epoch: 12000
 Loss: 0.52328813076	 Accuracy: 0.976000010967
Epoch: 13000
 Loss: 0.521253705025	 Accuracy: 0.976999998093
Epoch: 14000
 Loss: 0.522794902325	 Accuracy: 0.975000023842
Epoch: 15000
 Loss: 0.368185311556	 Accuracy: 0.985000014305
Epoch: 16000
 Loss: 0.299120634794	 Accuracy: 0.989000022411
Epoch: 17000
 Loss: 0.299335926771	

In [22]:
print 'Accuracy on original dataset: ', np.mean(np.equal(np.argmax(preds[0], 1), np.argmax(y, 1)))

Accuracy on original dataset:  0.919156414763


In [23]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_random, np.argmax(y_random[0], 1))
print model.score(X, np.argmax(y, 1))

0.905096660808


Accuracy is nice! o_O