In [1]:
import glob
import matplotlib.pyplot as plt
import os
import tensorflow as tf

# develop the input pipeline

In [4]:
def preprocess_image(filename, coords):
    # source: https://keras.io/examples/vision/siamese_network/
    target_shape = (200,200)
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.convert_to_tensor(image)
    x = tf.math.maximum(tf.cast(coords[0], tf.int32),0)
    y = tf.math.maximum(tf.cast(coords[1], tf.int32),0)
    w = tf.math.minimum(tf.cast(coords[2], tf.int32),tf.subtract(tf.shape(image)[1],x))
    h = tf.math.minimum(tf.cast(coords[3], tf.int32),tf.subtract(tf.shape(image)[0],y))
    image = tf.slice(image, [y,x,0], [h,w,-1] )
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, target_shape)
    return image


def decode_csv(line, video):
    record_defaults = [-1.]*9
    parsed_line = tf.io.decode_csv(line, record_defaults)
    features = tf.stack(tf.concat((parsed_line,[video]),0))  # Stack features so that you can later vectorize forward prop., etc.
    return features

def get_filename(video, frame):
    # preprocess tensors
    frame = tf.strings.as_string(tf.cast(frame, tf.int32))
    video = tf.strings.as_string(tf.cast(video, tf.int32))
    # get the leading zeros
    num_zeros_frame = tf.subtract(6,tf.strings.length(frame))
    leading_zeros_frame = tf.repeat("0",num_zeros_frame,0)
    leading_zeros_frame = tf.strings.reduce_join(leading_zeros_frame)
    num_zeros_video = tf.subtract(2,tf.strings.length(video))
    leading_zeros_video = tf.repeat("0",num_zeros_video,0)
    leading_zeros_video = tf.strings.reduce_join(leading_zeros_video)    
    # get the filename
    filename = tf.add("data/train/MOT16-", leading_zeros_video)
    filename = tf.add(filename, video)
    filename = tf.add(filename,"/img1/")
    filename = tf.add(filename, leading_zeros_frame)
    filename = tf.add(filename, frame)
    filename = tf.add(filename, ".jpg")
#     for i in filename:
#         print(i)
    return filename

def get_object(gt_line):
    frame = gt_line[0]
    id_ = gt_line[1]
    video = gt_line[-1]
    coords = gt_line[2:6]
    
#     video = 2
    filename = get_filename(video, frame)
    image = preprocess_image(filename, coords)
#     return image,tf.cast(video,tf.int32),tf.cast(frame,tf.int32),tf.cast(id_,tf.int32)
    return image,video,frame,id_

def read_gt(filename):
    video = tf.strings.split(filename,os.sep)[2]
    video = tf.strings.substr(video, 6, 2)
    video = tf.strings.to_number(video, tf.float32)
    gt = tf.data.TextLineDataset(filename).map(lambda x: decode_csv(x,video))
    return gt

In [None]:
# dataset = tf.data.Dataset.from_tensor_slices(["data/train/MOT16-02/gt/gt.txt","data/train/MOT16-04/gt/gt.txt"])
train_filepaths = glob.glob("data/train/*/gt/gt.txt")
valid_filepaths = glob.glob("data/train/*/gt/gt.txt")
dataset = tf.data.Dataset.from_tensor_slices(train_filepaths)
dataset = dataset.flat_map(read_gt)
anchor = dataset.map(get_object)
# dataset = dataset.batch(2)

for i,anchor in enumerate(anchor.skip(100).take(10)):
    print("Image ",i)
    positive_dataset = dataset.filter(lambda x: (anchor[1]==x[-1]) and (anchor[2]!=x[0]) and (anchor[3]==x[1])) # same video, diff frame, same id_
#     positive_dataset = tf.data.experimental.sample_from_datasets([positive_dataset])
    positive_dataset.shuffle(100)
    positive_dataset = positive_dataset.take(1)
    positive = positive_dataset.map(get_object)
    negative_dataset = dataset.filter(lambda x: not ((anchor[1]==x[-1]) and (anchor[3]==x[1]))) # not (same video and same id_)
#     negative_dataset = tf.data.experimental.sample_from_datasets([negative_dataset])
    negative_dataset = negative_dataset.shuffle(100)
    negative_dataset = negative_dataset.take(1)
    negative = negative_dataset.map(get_object)
    print("anchor")
    plt.imshow(anchor[0])
    plt.show()
    print("positive")
    plt.imshow(positive.__iter__().__next__()[0])
    plt.show()
    print("negative")
    plt.imshow(negative.__iter__().__next__()[0])
    plt.show()





# make the input pipeline as a dataset

In [15]:
def triplet_gen(stop):
    train_filepaths = glob.glob("data/train/*/gt/gt.txt")
    valid_filepaths = glob.glob("data/train/*/gt/gt.txt")
    dataset = tf.data.Dataset.from_tensor_slices(train_filepaths)
    dataset = dataset.flat_map(read_gt)
    anchors = dataset.map(get_object)
    i = 0
    for anchor in anchors: 
        if(i>=stop): break
        positive_dataset = dataset.filter(lambda x: (anchor[1]==x[-1]) and (anchor[2]!=x[0]) and (anchor[3]==x[1])) # same video, diff frame, same id_
        positive_dataset.shuffle(100)
        positive_dataset = positive_dataset.take(1)
        positive = positive_dataset.map(get_object)
        negative_dataset = dataset.filter(lambda x: not ((anchor[1]==x[-1]) and (anchor[3]==x[1]))) # not (same video and same id_)
        negative_dataset = negative_dataset.shuffle(100)
        negative_dataset = negative_dataset.take(1)
        negative = negative_dataset.map(get_object)
        yield anchor[0],positive.__iter__().__next__()[0],negative.__iter__().__next__()[0]
        i += 1


# TODO: If this dataset is too slow, I could simply make the positive and negative datasets this way
# the positive is just the anchor + 1, and the negative is the anchors shuffled 
triplet_dataset = tf.data.Dataset.from_generator(triplet_gen, args=[100], output_types=tf.float32, output_shapes = (3, 200, 200, 3) )


In [16]:
triplet_dataset = triplet_dataset.batch(10)
for i in triplet_dataset:
    print(tf.shape(i))

tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)
tf.Tensor([ 10   3 200 200   3], shape=(5,), dtype=int32)


# other codes

In [675]:
#source: https://stackoverflow.com/questions/49525056/tensorflow-python-reading-2-files/49548224#49548224

feature_names = ['f1','f2','f3','f4','f5']
record_defaults = [[""], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]


def decode_csv(line):
    parsed_line = tf.decode_csv(line, record_defaults)
    label =  parsed_line[-1]      # label is the last element of the list
    del parsed_line[-1]           # delete the last element from the list
    del parsed_line[0]            # even delete the first element bcz it is assumed NOT to be a feature
    features = tf.stack(parsed_line)  # Stack features so that you can later vectorize forward prop., etc.
    #label = tf.stack(label)          #NOT needed. Only if more than 1 column makes the label...
    batch_to_return = features, label
    return batch_to_return

filenames = tf.placeholder(tf.string, shape=[None])
dataset5 = tf.data.Dataset.from_tensor_slices(filenames)
dataset5 = dataset5.flat_map(lambda filename: tf.data.TextLineDataset(filename).skip(1).map(decode_csv))
dataset5 = dataset5.shuffle(buffer_size=1000)
dataset5 = dataset5.batch(7)
iterator5 = dataset5.make_initializable_iterator()
next_element5 = iterator5.get_next()

# Initialize `iterator` with training data. 
training_filenames = ["train_data1.csv", #TODO: I know wwhich ones
                      "train_data2.csv"]

# Initialize `iterator` with validation data.
validation_filenames = ["dev_data1.csv"] #TODO: I know w=which ones

with tf.Session() as sess:
    # Train 2 epochs. Then validate train set. Then validate dev set.
    for _ in range(2):     
        sess.run(iterator5.initializer, feed_dict={filenames: training_filenames})
        while True:
            try:
                features, labels = sess.run(next_element5)
                # Train...
                print("(train) features: ")
                print(features)
                print("(train) labels: ")
                print(labels)  
            except tf.errors.OutOfRangeError:
                print("Out of range error triggered (looped through training set 1 time)")
                break

    # Validate (cost, accuracy) on train set
    print("\nDone with the first iterator\n")

    sess.run(iterator5.initializer, feed_dict={filenames: validation_filenames})
    while True:
        try:
            features, labels = sess.run(next_element5)
            # Validate (cost, accuracy) on dev set
            print("(dev) features: ")
            print(features)
            print("(dev) labels: ")
            print(labels)
        except tf.errors.OutOfRangeError:
            print("Out of range error triggered (looped through dev set 1 time only)")
            break 

AttributeError: module 'tensorflow' has no attribute 'placeholder'