In [1]:
import os
import cv2
import sys
import itertools
import numpy as np
import tensorflow as tf
from configs.config import Config
from libs.networks.network_factory import get_network_byname
from libs import build_rpn, build_fast_rcnn, build_fpn

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
id_to_class = {1:'ascus', 2:'asch', 
  3:'lsil', 4:'hsil', 5:'scc',  6:'agc', 7:'trichomonas', 
 8:'candida',  9:'flora', 10:'herps', 11:'actinomyces'}
# set file of showing image and labels 
path_for_metadata = os.path.join(os.getcwd(), "labels.tsv")
path_sprite_image = os.path.join(os.getcwd(), "sprite_image.bmp")

### make sprite image and metadata

In [3]:
def create_sprite_image(images, width, height, number_padding=4): 
    """
    images: the list images for make up sprite image
    width: the number images along with width
    height: the number image along with height
    number_padding: the pixels padding along with images
    Returns a sprite image consisting of images passed as argument. 
    Images should be count x width x height"""
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1] 
    img_w = images.shape[2]
    assert width * height == images.shape[0]
    spriteimage = np.ones(((img_h +2 * number_padding) * height,
                           (img_w +2 * number_padding) * width, 3)) * 255
    
    for i in range(height): 
        for j in range(width):
            this_filter = i * width + j
            if this_filter < images.shape[0]: 
                this_img = images[this_filter] 
                spriteimage[i * (img_h+2 * number_padding) + number_padding :
                            (i + 1) * (img_h+2*number_padding) - number_padding,
                            j * (img_w+2 * number_padding) + number_padding :
                            (j + 1) * (img_w+2*number_padding) - number_padding] = this_img
    return spriteimage

##### load reference image

In [4]:
sample_labels = []
sample_images = []
sample_images_name = []
train_images = []

for l in os.listdir("./images/"):
    label_path = os.path.join("./images/", l)
    if os.path.isdir(label_path):
        for i in os.listdir(label_path):
            item_image = cv2.imread(os.path.join(label_path, i))
            train_images.append(cv2.resize(item_image,(224,224)))
            item_image = cv2.resize(item_image,(28,28))
            sample_images_name.append(int(i.split(".")[0]))
            sample_images.append(item_image)
            sample_labels.append(id_to_class[int(l)])

#### shuffle the data

In [5]:
sample_images = np.array(sample_images)
sample_labels = np.array(sample_labels)
sample_images_name = np.array(sample_images_name)
train_images = np.array(train_images)
# shuffle the data
shuffle_index = np.random.permutation(sample_images.shape[0])
train_images = train_images[shuffle_index,:,:,:]
sample_images = sample_images[shuffle_index,:,:,:]
sample_labels = sample_labels[shuffle_index]
sample_images_name = sample_images_name[shuffle_index]

In [6]:
sample_images_name[0]

1130

In [7]:
sprite_image = create_sprite_image(sample_images[:39*40,:,:,:], 39, 40)
cv2.imwrite(path_sprite_image, sprite_image)

# write the corresponding labels into metadata
with open(path_for_metadata,'w') as f: 
    f.write("Index\tLabel\n") 
    for index,label in zip(sample_images_name[:39*40], sample_labels[:39*40]): 
        f.write("%d\t%s\n" % (index,label))

### Embedding the reference images 

#### setup embedding model

In [8]:
def model_fn(features,
             mode,
             params,
             config):
    net_config = params["net_config"]
    IS_TRAINING = False
    origin_image_batch = tf.cast(features, tf.float32)
    image_batch = origin_image_batch - net_config.PIXEL_MEANS
    # there is is_training means that bn is training, so it is important!
    _, share_net = get_network_byname(inputs=image_batch,
                                      config=net_config,
                                      is_training=IS_TRAINING,
                                      reuse=tf.AUTO_REUSE)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predicts = {"embedding_feature": share_net["C5"]}

    return tf.estimator.EstimatorSpec(mode, predictions=predicts)

In [9]:
net_config = Config()
session_config = tf.ConfigProto()
session_config.gpu_options.allow_growth = True
session_config.allow_soft_placement = True
estimator_config = tf.estimator.RunConfig(model_dir=net_config.MODLE_DIR,
                                          session_config=session_config)

my_estimator = tf.estimator.Estimator(model_fn,
                                      params={"net_config": net_config}, 
                                      config=estimator_config)

INFO:tensorflow:Using config: {'_model_dir': './logs', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  allow_growth: true
}
allow_soft_placement: true
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f065248bf98>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [10]:
def build_reference_batch(images):
    reference_data = tf.data.Dataset.from_tensor_slices(images)
    return reference_data.batch(32)

In [11]:
single_record = my_estimator.predict(input_fn=lambda:build_reference_batch(train_images),
                                     yield_single_examples=False)

In [12]:
total_pyramid_feature = []
while True:
    try:
        pyramid_feature = next(single_record)["embedding_feature"]
        total_pyramid_feature.append(pyramid_feature)
    except:
        break

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./logs/model.ckpt-224388
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [14]:
all_pyramid_feature = np.concatenate(total_pyramid_feature, axis=0)
print(np.shape(all_pyramid_feature))
# Arrangement the reference image feature
embedding_input = all_pyramid_feature[:39*40,:, :, :]
embedding_input = np.reshape(embedding_input, (np.shape(embedding_input)[0], -1))
embedding_size = np.shape(embedding_input)
print(embedding_size)

(1563, 7, 7, 2048)
(1560, 100352)


#### setup the embedding function

In [15]:
embedding = tf.Variable(tf.zeros(embedding_size), name="test_embedding")
assignment = embedding.assign(embedding_input)

writer = tf.summary.FileWriter("embedding/")
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = path_sprite_image
embedding_config.metadata_path = path_for_metadata
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([36, 36])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
sess.run(assignment)
saver.save(sess, os.path.join("embedding","model.ckpt"), 1)

'embedding/model.ckpt-1'

### K-means cluster

In [17]:
from sklearn.cluster import KMeans
class_list = ["ascu", "asch", "lsil","hsil", "scc", "agc", "trich", "cand", "flora", "herps", "actin"]
number_list = [3, 4, 2, 4, 2, 3, 1, 2, 2, 4, 1]
sample_labels = sample_labels[:39*40]
sample_images_name = sample_images_name[:39*40]

class_to_id = {}
for key in id_to_class:
    class_to_id[id_to_class[key]] = key
sample_labels_id = np.zeros_like(sample_labels)
for i in range(np.shape(sample_labels)[0]):
    sample_labels_id[i] = class_to_id[sample_labels[i]]


In [22]:
for i in range(1, 12):
    tmp_number = number_list[i-1]
    tmp_features = embedding_input[sample_labels_id==str(i)]
    tmp_image_name = sample_images_name[sample_labels_id==str(i)]
    kmeans = KMeans(n_clusters=tmp_number, random_state=0).fit(tmp_features)
    tmp_expand_features = np.expand_dims(tmp_features, 1)
    center_expand_features = np.expand_dims(kmeans.cluster_centers_, 0)
    tmp_diff = np.sum(np.abs(tmp_expand_features-center_expand_features), 2)
    origin_shape = tmp_diff.shape
    tmp_diff = np.reshape(tmp_diff, (-1))
    sort_index = np.argsort(tmp_diff)
    concat_name_list = []
    for j in range(3):
        concat_name_list.append(tmp_image_name[sort_index[j]//tmp_number + 1])
    print(concat_name_list)

[845, 1075, 257]
[278, 910, 93]
[752, 974, 617]
[293, 1629, 738]
[761, 659, 1049]
[1059, 552, 112]
[998, 587, 537]
[1207, 1002, 1346]
[568, 1100, 1448]
[884, 1080, 1213]
[1248, 730, 328]
