# Fitting the sklearn breast cancer dataset
A practice creating manual mlp.

## Exploratory

In [1751]:
from sklearn.datasets import load_breast_cancer
import numpy as np

raw_data = load_breast_cancer()
raw_data.keys()


dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [1752]:
raw_data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [1753]:
raw_data.target_names

array(['malignant', 'benign'], dtype='<U9')

In [1754]:
raw_data.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [1755]:
from scipy import stats

description = stats.describe(raw_data.data)
description
raw_data.data.shape

(569, 30)

## Create computation graph

In [1756]:
import tensorflow as tf

no_of_features = raw_data.data.shape[1]

In [1757]:
def reset_graph(seed=44):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed = seed
    
reset_graph()

In [1758]:
def create_one_layer(from_node, no_of_neurons, name, activation=None):
    with tf.name_scope(name):
        no_of_inputs = int(from_node.get_shape()[1])
        stddev = 2/np.sqrt(no_of_inputs)
        init = tf.truncated_normal((no_of_inputs, no_of_neurons), stddev=stddev)
        initializer = tf.contrib.layers.variance_scaling_initializer()

        W = tf.Variable(initializer([no_of_inputs, no_of_neurons]), name='weights')
        
        b = tf.Variable(tf.zeros([no_of_neurons], name='bias'))
        Z = tf.matmul(from_node, W) + b
        
        if activation is not None:
            return activation(Z)
        else:
            return Z


In [1759]:
input_node = tf.placeholder(tf.float32, shape=(None, 30), name='input_node')
results = tf.placeholder(tf.int32, shape=(None), name='input_node')

In [1760]:
from tensorflow.contrib.layers import batch_norm
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.layers import dropout

is_training = tf.placeholder(tf.bool, shape=(), name='is_training')
bn_params = {
    'is_training': is_training,
    'decay': 0.99,
    'updates_collections': None
}

dropout_rate = 0.5
input_node_drop = dropout(input_node, dropout_rate, is_training=is_training)
nnlayer1 = fully_connected(input_node_drop, 300, scope="nnlayer1", activation_fn=tf.nn.elu, normalizer_fn=batch_norm, normalizer_params=bn_params)
nnlayer1_drop = dropout(nnlayer1, dropout_rate, is_training=is_training)
nnlayer2 = fully_connected(nnlayer1_drop, 100, scope="nnlayer2", activation_fn=tf.nn.elu, normalizer_fn=batch_norm, normalizer_params=bn_params)
nnlayer2_drop = dropout(nnlayer2, dropout_rate, is_training=is_training)
nnlayerOut = create_one_layer(nnlayer2_drop, 2, 'nnlayerOut')

# nnlayer1 = create_one_layer(input_node, 300, 'nnlayer1', activation=tf.nn.elu)
# nnlayer2 = create_one_layer(nnlayer1, 100, 'nnlayer2', activation=tf.nn.elu)
# nnlayerOut = create_one_layer(nnlayer2, 2, 'nnlayerOut')

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=results, logits=nnlayerOut)
    loss = tf.reduce_mean(xentropy, name="loss")

In [1761]:
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)


## Prepare data

In [1762]:
from sklearn.preprocessing import StandardScaler

def generate():

    def split_train_test(data, test_ratio):
        shuffled_indices = np.random.permutation(len(data))
        test_set_size = int(len(data) * test_ratio)
        test_indices = shuffled_indices[:test_set_size]
        train_indices = shuffled_indices[test_set_size:]
        return data[train_indices], data[test_indices]

    reshaped_target = raw_data.target.reshape(-1,1) # + 1
    Xy_joined = np.concatenate((raw_data.data, reshaped_target), axis=1)

    train_data, test_data = split_train_test(Xy_joined, 0.6)

    X_train = np.split(train_data, [-1], axis=1)[0]
    y_train = np.split(train_data, [-1], axis=1)[1]

    X_valid = np.split(test_data, [-1], axis=1)[0]
    y_valid = np.split(test_data, [-1], axis=1)[1].reshape(-1)



    X_train = StandardScaler().fit_transform(X_train)
    y_train = y_train.reshape(-1,)
    X_valid = StandardScaler().fit_transform(X_valid)

In [1763]:
# (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
# X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
# X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
# y_train = y_train.astype(np.int32)
# y_test = y_test.astype(np.int32)
# X_valid, X_train = X_train[:5000], X_train[5000:]
# y_valid, y_train = y_train[:5000], y_train[5000:]

In [1764]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch


In [1765]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(nnlayerOut, results, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [1766]:
no_of_epochs = 300
batch_size = 50
permutations = 20

with tf.Session() as sess:
    
    for perm in range(permutations): 
        generate()
        tf.global_variables_initializer().run()

        for epoch in range(no_of_epochs):
            for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
                sess.run(train, feed_dict={is_training: True, input_node: X_batch, results: y_batch})


            acc_epoch = accuracy.eval(feed_dict={is_training: False, input_node: X_train, results: y_train})
            # y_eval = xentropy.eval(feed_dict={is_training: False, input_node:X_valid, results: y_valid})
            acc_val = accuracy.eval(feed_dict={is_training: False, input_node:X_valid, results: y_valid})

        print(epoch, "Batch accuracy: ", acc_epoch, "ValAccuracy: ", acc_val)
    

AssertionError: Do not use tf.reset_default_graph() to clear nested graphs. If you need a cleared graph, exit the nesting and create a new graph.

In [None]:
y_valid.shape

# Testing with good tf codes