In [11]:
from google.colab import files
uploaded = files.upload()


Saving X.npy to X (1).npy


In [12]:
from google.colab import files
uploaded = files.upload()

Saving Y.npy to Y (1).npy


In [19]:
# Breast Histology Images
# Classify IDC vs non IDC images
#
# https://www.kaggle.com/simjeg/lymphoma-subtype-classification-fl-vs-cll
#
# This dataset consists of 5547 breast histology images of size 50 x 50 x 3,
# The goal is to classify cancerous images (IDC : invasive ductal carcinoma) vs non-IDC images.
#
# Lenet 5
#


import tensorflow as tf
import numpy as np
import time
import os
from sklearn.model_selection import train_test_split


def print_time(message, start_time):
    print("*****   " + message + ": {}  *****".format(time.time() - start_time))


def get_batch(all_data, all_labels, batch_size=16):
    # // means divide and result is integer, / returns float
    # rows_data = len(all_data) // batch_size
    # rows_labels = len(all_labels) // batch_size

    rtn_data = all_data.reshape(-1, batch_size, int(50*50*3))
    rnt_labels = all_labels.reshape(-1, batch_size, 2)

    return (rtn_data, rnt_labels)


def conv2d(x, weight, bias, strides=1):
    x = tf.nn.conv2d(x, weight, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, bias)

    return tf.nn.relu(x)


def maxpool2d(x, k=2):

    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')


def conv_net(x, weights, biases, dropout):

    x = tf.reshape(x, shape=[-1, 50, 50, 3])

    # Convolution layer 1
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    print("conv1.get_shape(): ", conv1.get_shape())

    # MAX POOLING
    conv1 = maxpool2d(conv1, k=2)
    print("conv1.get_shape() after maxpool : ", conv1.get_shape())

    # Convolution layer 2
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    print("conv2.get_shape(): ", conv2.get_shape())

    # MAX POOLING
    conv2 = maxpool2d(conv2, k=2)
    print("conv2.get_shape() after maxpool : ", conv2.get_shape())

    # Fully connected layer
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
   # fc1 = tf.nn.dropout(fc1, 0.75)

    # Fully connected layer 2
    # fc2 = tf.reshape(fc1, [-1, weights['wd2'].get_shape().as_list()[0]])
    # fc2 = tf.add(tf.matmul(fc2, weights['wd2']), biases['bd2'])
    # fc2 = tf.nn.relu(fc2)
    # fc2 = tf.nn.dropout(fc2, 0.75)

    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])

    return out


#####################################################################################################
#
#   Main
#
#
######################################################################################################
START_TIME = time.time()
path = r'C:\Users\gtune\OneDrive\document_usk\UCSC\02_SecondQuater\Deep_Learning_and_Artificial_Intelligence_with_TensorFlow\Homework\Final_Project\data'

X_data = np.load("X.npy")
Y_truth = np.load("Y.npy")

# print(X_data[0:10])
# print(X_data.shape)
# print(Y_truth[0:100])

# print(np.array(np.where(Y_truth == 1)))

train_data, test_data, train_Y, test_Y = \
    train_test_split(X_data, Y_truth, test_size=0.25, random_state=3)

print(len(np.array(np.where(train_Y == 1)).ravel()))
print(len(np.array(np.where(train_Y == 0)).ravel()))

print(train_data.shape)
print(train_Y.shape)
#print(train_Y[0:50])

# architecture hyper-parameter
num_datapoints = len(train_data)
learning_rate = 0.0001   # 0.001
n_epoch = 1000
batch_size = 52  # divisor of 4160 : 1, 2, 4, 5, 8, 10, 13, 16, 20, 26, 32, 40, 52, 64, 65, 80, 104, 130, 160, 208, 260, 320, 416, 520, 832, 1040, 2080, 4160

n_input = 50*50*3
n_classes = 2   # IDC and non-IDC
dropout = 0.75

# Normalization
max_value = np.max(train_data)
print('max_value -> {}'.format(max_value))

train_data = np.array(train_data/max_value, dtype=np.float32)

# data and Y(Label) reshaped
onehot_train_Y = np.full((len(train_Y), n_classes), 0)
onehot_train_Y[np.arange(0, len(train_Y)), train_Y] = 1
train_data, onehot_train_Y = get_batch(train_data, onehot_train_Y, batch_size)

test_data = test_data.reshape(-1, n_input)
onehot_test_Y = np.full((len(test_Y), n_classes), 0)
onehot_test_Y[np.arange(0, len(test_Y)), test_Y] = 1


num_batches = num_datapoints // batch_size  # 10  # num_datapoints // batch_size

print_time("num_batches", START_TIME)

# data_x, labels_y = get_batch(X_train, onehot_labels, batch_size)

print("NO OF BATCHES:", num_batches)
print_time("NO OF BATCHES", START_TIME)


# tensorflow placeholder
X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# wc 1,2  are filter
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 18])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 18, 48])),
    'wd1': tf.Variable(tf.random_normal([13*13*48, 360])),
    # 'wd2': tf.Variable(tf.random_normal([360, 252])),
    'out': tf.Variable(tf.random_normal([360, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([18])),
    'bc2': tf.Variable(tf.random_normal([48])),
    'bd1': tf.Variable(tf.random_normal([360])),
    # 'bd2': tf.Variable(tf.random_normal([252])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Create the Model
model = conv_net(X, weights, biases, keep_prob)

# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
train_min = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# Evaluate the model
correct_model = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_model, tf.float32))

# Initialing the variables
init = tf.global_variables_initializer()

# Tensorflow Session

with tf.Session() as sess:

    sess.run(init)

    for epoch in range(n_epoch):
        for i in range(num_batches):
            # for _ in range(1):

            batch_x = train_data[i]
            batch_y = onehot_train_Y[i]

            # Use training data for optimization
            sess.run(train_min, feed_dict={X: batch_x, Y: batch_y, keep_prob: dropout})

        # varidate after every epoch
        batch_x = train_data[0]
        batch_y = onehot_train_Y[0]

        losscalc, accuracycalc = sess.run([loss, accuracy], feed_dict={X:batch_x, Y:batch_y, keep_prob:1.0})

        test_accuracycalc = sess.run(accuracy, feed_dict={X: test_data, Y: onehot_test_Y, keep_prob: 1.0})

        print("Epoch: %d, Loss: %0.4f, Train_Acc: %0.4f, TEST_Acc: %0.4f, Time: %0.4f" % (epoch, losscalc, accuracycalc, test_accuracycalc, time.time() - START_TIME))

        # when train accuracy is over 95%, program end
        if accuracycalc >= 0.95:
            break

    # display the accuracy of using testing data
    accuracycalc = sess.run(accuracy, feed_dict={X: test_data, Y: onehot_test_Y, keep_prob: 1.0})

    print("                  Testing accuracy: %0.4f, Time: %0.4f" % (accuracycalc, time.time() - START_TIME))

print_time("END", START_TIME)



2078
2082
(4160, 50, 50, 3)
(4160,)
max_value -> 255
*****   num_batches: 0.2580108642578125  *****
NO OF BATCHES: 80
*****   NO OF BATCHES: 0.25908398628234863  *****
conv1.get_shape():  (?, 50, 50, 18)
conv1.get_shape() after maxpool :  (?, 25, 25, 18)
conv2.get_shape():  (?, 25, 25, 48)
conv2.get_shape() after maxpool :  (?, 13, 13, 48)
Epoch: 0, Loss: 3168.0627, Train_Acc: 0.5769, TEST_Acc: 0.5876, Time: 4.6269
Epoch: 1, Loss: 2640.0820, Train_Acc: 0.6154, TEST_Acc: 0.5941, Time: 5.4745
Epoch: 2, Loss: 2372.3643, Train_Acc: 0.6154, TEST_Acc: 0.6056, Time: 6.3243
Epoch: 3, Loss: 2203.3882, Train_Acc: 0.5962, TEST_Acc: 0.6179, Time: 7.1637
Epoch: 4, Loss: 2098.2505, Train_Acc: 0.5962, TEST_Acc: 0.6301, Time: 8.0062
Epoch: 5, Loss: 2050.9756, Train_Acc: 0.5769, TEST_Acc: 0.6352, Time: 8.8451
Epoch: 6, Loss: 1952.7042, Train_Acc: 0.5769, TEST_Acc: 0.6410, Time: 9.6920
Epoch: 7, Loss: 1890.2374, Train_Acc: 0.5577, TEST_Acc: 0.6489, Time: 10.5350
Epoch: 8, Loss: 1848.7249, Train_Acc: 0.5