In [0]:
from google.colab import files
uploaded = files.upload()


Saving X.npy to X.npy


In [0]:
from google.colab import files
uploaded = files.upload()

Saving Y.npy to Y.npy


In [0]:
# Breast Histology Images
# Classify IDC vs non IDC images
#
# https://www.kaggle.com/simjeg/lymphoma-subtype-classification-fl-vs-cll
#
# This dataset consists of 5547 breast histology images of size 50 x 50 x 3,
# The goal is to classify cancerous images (IDC : invasive ductal carcinoma) vs non-IDC images.
#
#


import tensorflow as tf
import numpy as np
import time
import os
from sklearn.model_selection import train_test_split


def print_time(message, start_time):
    print("*****   " + message + ": {}  *****".format(time.time() - start_time))


def get_batch(all_data, all_labels, batch_size=16):
    # // means divide and result is integer, / returns float
    # rows_data = len(all_data) // batch_size
    # rows_labels = len(all_labels) // batch_size

    rtn_data = all_data.reshape(-1, batch_size, int(50*50*3))
    rnt_labels = all_labels.reshape(-1, batch_size, 2)

    return (rtn_data, rnt_labels)


def conv2d(x, weight, bias, strides=1):
    x = tf.nn.conv2d(x, weight, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, bias)

    return tf.nn.relu(x)


def maxpool2d(x, k=2):

    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')


def conv_net(x, weights, biases, dropout):

    x = tf.reshape(x, shape=[-1, 50, 50, 3])

    # Convolution layer 1
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    print("conv1.get_shape(): ", conv1.get_shape())

    # MAX POOLING
    conv1 = maxpool2d(conv1, k=2)
    print("conv1.get_shape() after maxpool : ", conv1.get_shape())

    # Convolution layer 2
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    print("conv2.get_shape(): ", conv2.get_shape())

    # MAX POOLING
    conv2 = maxpool2d(conv2, k=2)
    print("conv2.get_shape() after maxpool : ", conv2.get_shape())

    # Convolution layer 3
    conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    print("conv3.get_shape(): ", conv3.get_shape())

    # MAX POOLING
    conv3 = maxpool2d(conv3, k=2)
    print("conv3.get_shape() after maxpool : ", conv3.get_shape())

    # Fully connected layer
    fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    # Fully connected layer
    fc2 = tf.reshape(fc1, [-1, weights['wd2'].get_shape().as_list()[0]])
    fc2 = tf.add(tf.matmul(fc2, weights['wd2']), biases['bd2'])
    fc2 = tf.nn.relu(fc2)
    fc2 = tf.nn.dropout(fc2, dropout)

    out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])

    return out


#####################################################################################################
#
#   Main
#
#
######################################################################################################
START_TIME = time.time()
path = r'C:\Users\gtune\OneDrive\document_usk\UCSC\02_SecondQuater\Deep_Learning_and_Artificial_Intelligence_with_TensorFlow\Homework\Final_Project\data'

X_data = np.load("X.npy")
Y_truth = np.load("Y.npy")

# print(X_data[0:10])
# print(X_data.shape)
# print(Y_truth[0:100])

# print(np.array(np.where(Y_truth == 1)))

train_data, test_data, train_Y, test_Y = \
    train_test_split(X_data, Y_truth, test_size=0.25, random_state=3)

print(len(np.array(np.where(train_Y == 1)).ravel()))
print(len(np.array(np.where(train_Y == 0)).ravel()))

print(train_data.shape)
print(train_Y.shape)

# architecture hyper-parameter
num_datapoints = len(train_data)
learning_rate = 0.000005   # 0.001
n_epoch = 10000
batch_size = 52  # divisor of 4160 : 1, 2, 4, 5, 8, 10, 13, 16, 20, 26, 32, 40, 52, 64, 65, 80, 104, 130, 160, 208, 260, 320, 416, 520, 832, 1040, 2080, 4160

n_input = 50*50*3
n_classes = 2   # IDC and non-IDC
dropout = 0.75

# Normalization
max_value = np.max(train_data)
print('max_value -> {}'.format(max_value))

train_data = np.array(train_data/max_value, dtype=np.float32)

# data and Y(Label) reshaped
onehot_train_Y = np.full((len(train_Y), n_classes), 0)
onehot_train_Y[np.arange(0, len(train_Y)), train_Y] = 1
train_data, onehot_train_Y = get_batch(train_data, onehot_train_Y, batch_size)

test_data = test_data.reshape(-1, n_input)
onehot_test_Y = np.full((len(test_Y), n_classes), 0)
onehot_test_Y[np.arange(0, len(test_Y)), test_Y] = 1


num_batches = num_datapoints // batch_size  # 10  # num_datapoints // batch_size

print_time("num_batches", START_TIME)

# data_x, labels_y = get_batch(X_train, onehot_labels, batch_size)

print("NO OF BATCHES:", num_batches)
print_time("NO OF BATCHES", START_TIME)


# tensorflow placeholder
X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# initializer = tf.contrib.layers.variance_scaling_initializer(factor=2.0, seed=0)
# initializer2 = tf.contrib.layers.variance_scaling_initializer(factor=2.0, seed=1)
# initializer3 = tf.contrib.layers.variance_scaling_initializer(factor=2.0, seed=2)
# initializer4 = tf.contrib.layers.variance_scaling_initializer(factor=2.0, seed=3)

# wc 1,2  are filter
'''
weights = {
    'wc1': tf.Variable(initializer([5, 5, 3, 32])),
    'wc2': tf.Variable(initializer2([5, 5, 32, 32])),
    'wd1': tf.Variable(initializer3([13*13*32, 1024])),
    'out': tf.Variable(initializer4([1024, n_classes]))
}
'''
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 36])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 36, 36])),
    'wc3': tf.Variable(tf.random_normal([5, 5, 36, 36])),
    'wd1': tf.Variable(tf.random_normal([7*7*36, 576])),
    'wd2': tf.Variable(tf.random_normal([576, 1024])),
#    'wd1': tf.Variable(tf.random_normal([7 * 7 * 32, 288])),9
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([36])),
    'bc2': tf.Variable(tf.random_normal([36])),
    'bc3': tf.Variable(tf.random_normal([36])),
    'bd1': tf.Variable(tf.random_normal([576])),
    'bd2': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Create the Model
model = conv_net(X, weights, biases, keep_prob)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
train_min = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# Evaluate the model
correct_model = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_model, tf.float32))

# Initialing the variables
init = tf.global_variables_initializer()

# Set config
# os.environ["OMP_NUM_THREADS"] = "2"
# os.environ["KMP_BLOCKTIME"] = "30"
# os.environ["KMP_SETTINGS"] = "1"
# os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0"

# Tensorflow Session

with tf.Session() as sess:

    sess.run(init)

    for epoch in range(n_epoch):
        for i in range(num_batches):
            # for _ in range(1):

            batch_x = train_data[i]
            batch_y = onehot_train_Y[i]

            # Use training data for optimization
            sess.run(train_min, feed_dict={X: batch_x, Y: batch_y, keep_prob: dropout})

        # varidate after every epoch
        batch_x = train_data[0]
        batch_y = onehot_train_Y[0]

        losscalc, accuracycalc = sess.run([loss, accuracy], feed_dict={X:batch_x, Y:batch_y, keep_prob:1.0})
        test_accuracycalc = sess.run(accuracy, feed_dict={X: test_data[0:300], Y: onehot_test_Y[0:300], keep_prob: 1.0})

        print("Epoch: %d, Loss: %0.4f, Train_Acc: %0.4f, TEST_Acc: %0.4f, Time: %0.4f" % (epoch, losscalc, accuracycalc, test_accuracycalc, time.time() - START_TIME))

        # when train accuracy is over 95%, program end
        if accuracycalc >= 0.95:
            break

    # display the accuracy of using testing data
    accuracycalc = sess.run(accuracy, feed_dict={X: test_data, Y: onehot_test_Y, keep_prob: 1.0})

    print("                  Testing accuracy: %0.4f, Time: %0.4f" % (accuracycalc, time.time() - START_TIME))


print_time("END", START_TIME)



2078
2082
(4160, 50, 50, 3)
(4160,)
max_value -> 255
*****   num_batches: 0.31189799308776855  *****
NO OF BATCHES: 80
*****   NO OF BATCHES: 0.313065767288208  *****
conv1.get_shape():  (?, 50, 50, 36)
conv1.get_shape() after maxpool :  (?, 25, 25, 36)
conv2.get_shape():  (?, 25, 25, 36)
conv2.get_shape() after maxpool :  (?, 13, 13, 36)
conv3.get_shape():  (?, 13, 13, 36)
conv3.get_shape() after maxpool :  (?, 7, 7, 36)
Epoch: 0, Loss: 8254691.5000, Train_Acc: 0.5577, TEST_Acc: 0.4867, Time: 3.6548
Epoch: 1, Loss: 5762106.5000, Train_Acc: 0.5385, TEST_Acc: 0.4833, Time: 4.8012
Epoch: 2, Loss: 4209255.0000, Train_Acc: 0.5192, TEST_Acc: 0.4800, Time: 5.9327
Epoch: 3, Loss: 3472066.5000, Train_Acc: 0.4808, TEST_Acc: 0.4867, Time: 7.0714
Epoch: 4, Loss: 3017952.5000, Train_Acc: 0.4808, TEST_Acc: 0.4600, Time: 8.2106
Epoch: 5, Loss: 2874835.7500, Train_Acc: 0.5000, TEST_Acc: 0.4733, Time: 9.3420
Epoch: 6, Loss: 2910552.0000, Train_Acc: 0.5000, TEST_Acc: 0.4800, Time: 10.4756
Epoch: 7, Los

KeyboardInterrupt: ignored