In [1]:
# Data from https://www.kaggle.com/c/human-protein-atlas-image-classification/data

In [2]:
import numpy as np
import tensorflow as tf

import scipy.io as sio
from os import path
import matplotlib.pyplot as plt

from PIL import Image
import glob

In [3]:
np.random.seed(1)

In [4]:
sample_names = 'hpa'
record_dir = path.join('..', '..', 'data', '2d_data', 'tf_records', 'dataset_1', 'ground_truths')
dataset_dir = path.join('..', '..', 'data', '2d_data', 'real_data', 'hpa_cells', 'train')
files = glob.glob(path.join(dataset_dir, '*'))

NUM_SAMPLES = 10000

obj_dims = (648, 486)

In [5]:
np.random.shuffle(files)
files = files[:NUM_SAMPLES]

In [6]:
def normalize(im):
    """
    Normalizes im from 0 to 1.
    """
    im_max = np.max(im)
    im_min = np.min(im)
    return (im - im_min) / (im_max - im_min)

def _create_example(plane):
    """
    Creates and returns tf.Example from a given numpy array.
    """
    plane_feature = tf.train.Feature(float_list=tf.train.FloatList(value=plane.ravel()))
    feature = {
        'plane': plane_feature
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

In [7]:
for i in range(NUM_SAMPLES):
    sample = Image.open(files[i])
    sample = sample.resize(np.flip(obj_dims))

    sample = np.asarray(sample)
    sample = normalize(sample)
    record_file = path.join(record_dir, sample_names + '-%.5d' % i + '.tfrecord')
    with tf.io.TFRecordWriter(record_file) as writer:
        tf_example = _create_example(sample)
        writer.write(tf_example.SerializeToString())