In [8]:
import os
import numpy as np

import tensorflow as tf
import tensorflow_addons as tfa
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
print(tf.__version__)
AUTOTUNE = tf.data.experimental.AUTOTUNE

import deepcell
# Changed from before due to new placement of Track, concat_tracks
from deepcell_tracking.utils import load_trks
from deepcell.data.tracking import Track, concat_tracks
##############
from sklearn.model_selection import train_test_split
from deepcell.utils.data_utils import reshape_movie
from deepcell.utils.transform_utils import erode_edges
from deepcell.data import split_dataset
from deepcell_toolbox.processing import normalize, histogram_normalization

import spektral

2.8.0


In [3]:
import json
def load_img_dict(file):
    f = open(file)
    d = json.load(f)
    d = {int(k1): {int(k2): {int(k3): v for k3, v in d[k1][k2].items()} for k2, d[k1][k2] in d[k1].items()} for k1, d[k1] in d.items()}
    return d
def load_img_idx_dict(file):
    f = open(file)
    d = json.load(f)
    d = {int(k): v for k, v in d.items()}
    return d

In [4]:
train_good_imgs = load_img_dict('../dataset_pruning/train_appearances_dict.json')
train_blank_imgs = load_img_dict('../dataset_pruning/train_blank_dict.json')
train_border_imgs = load_img_dict('../dataset_pruning/train_border_dict.json')
val_good_imgs = load_img_dict('../dataset_pruning/val_appearances_dict.json')
val_blank_imgs = load_img_dict('../dataset_pruning/val_blank_dict.json')
val_border_imgs = load_img_dict('../dataset_pruning/val_border_dict.json')

In [None]:
# Since the images are written to disk one at a time, we can modify the write function to write the image to disk
# only when the image is "good." Can pass in the dictionary of good images as an argument. The funciton uses 
# track.appearances, the NP array, to write the file. Since the dictionary corresponds with the indices of the 
# array, we can use this to determine which images to add.

# Based on this line in the Track class, 'appearances = np.zeros(batch_shape + appearance_shape, dtype='float32')',
# it seems the track.appearances object does not have any batch/cells/frames pattern, but we should check.

In [2]:
import argparse
import os

import numpy as np
import tensorflow as tf

from deepcell_tracking.trk_io import load_trks
from deepcell_tracking.utils import get_max_cells
from deepcell.data.tracking import Track
# Might want to import this just to get the functions it uses
from deepcell.utils.tfrecord_utils import write_tracking_dataset_to_tfr

def get_arg_parser():
    parser = argparse.ArgumentParser()

    parser.add_argument('--data-path',
                        default='/training/tracking-nuclear',
                        help='Path to the training data.')

    parser.add_argument('--appearance-dim', type=int, default=64)
    parser.add_argument('--distance-threshold', type=int, default=64)
    parser.add_argument('--crop-mode', type=str, default='fixed')

    return parser

In [None]:
def write_tracking_dataset_to_tfr(track,
                                  filename,
                                  good_imgs,
                                  target_max_cells=168,
                                  verbose=True):

    filename_tfr = filename + '.tfrecord'
    filename_csv = filename + '.csv'

    count = 0

    writer = tf.io.TFRecordWriter(filename_tfr)

    # Get features to add
    # WE PROBABLY ONLY CARE ABOUT APP
    app = track.appearances

    # Pad cells - we need to do this to use validation data
    # during training

    # TARGET MAX CELLS WILL BE THE MAXIMUM 'CELLS' DIMENSION BETWEEN TRAIN AND VAL,
    # AND THE OTHER ONE WILL BE PADDED TO ACHIEVE THAT. I PROBABLY DON'T NEED TO
    # PAD ANYTHING, SINCE MY DATA IS IN THE FORMAT (num_imgs, dim, dim, 1), NOT
    # (batches, frames, cells, dim, dim, 1).
    
    # Iterate over all batches
    # THIS SHOULD PROBABLY BE THE CELLS THEMSELVES IN MY CASE
    for b in range(app.shape[0]):
        for f in range(app.shape[1]):
            for c in range(app.shape[2]):
                if good_imgs[b][f][c] != -1:
                    img = app[b, f, c]
                    track_dict = {'app': img}

                    example = create_tracking_example(track_dict)

                    if example is not None:
                        writer.write(example.SerializeToString())
                        count += 1

    writer.close()

    if verbose:
        print(f'Wrote {count} elements to TFRecord')

    # WE'LL WORRY ABOUT THE CSV WRITER FOR METADATA AT THE END, IF NECESSARY
    # OKAY WE MIGHT NEED IT TO PARSE THE DATA
    # Save dataset metadata
    # THIS SHOULD BE OKAY--WE JUST HAVE ONE KEY RATHER THAN A BUNCH, NOW
    dataset_keys = track_dict.keys()
    dataset_dims = [len(track_dict[k].shape) for k in dataset_keys]

    with open(filename_csv, 'w') as f:
        writer = csv.writer(f)
        rows = [[k, dims] for k, dims in zip(dataset_keys, dataset_dims)]
        writer.writerows(rows)
        
        # SHOULDN'T NEED ROWS FOR adj_shape AND temp_adj_shape, SINCE WE'RE NOT
        # WRITING THESE

    return count

In [11]:
# Probably no reaon to use argument parser--only there because they wanted to run it from the command line
# args = get_arg_parser().parse_args([])

# train_trks = load_trks(os.path.join(args.data_path, 'train.trks'))
val_trks = load_trks(os.path.join('/training/tracking-nuclear', 'val.trks'))

# max_cells = max([get_max_cells(train_trks['y']), get_max_cells(val_trks['y'])])

# for split, trks in zip({'train', 'val'}, [train_trks, val_trks]):
#     print('Preparing {} as tf record'.format(split))

#     with tf.device('/cpu:0'):
#         tracks = Track(tracked_data=trks,
#                        appearance_dim=args.appearance_dim,
#                        distance_threshold=args.distance_threshold,
#                        crop_mode=args.crop_mode)

#         write_tracking_dataset_to_tfr(tracks, target_max_cells=max_cells, filename=split)

In [13]:
val_tracks = Track(tracked_data=val_trks, appearance_dim=32, distance_threshold=64, crop_mode='fixed')

100%|███████████████████████████████████████████| 27/27 [02:50<00:00,  6.30s/it]
100%|███████████████████████████████████████████| 27/27 [03:51<00:00,  8.57s/it]
2022-08-05 00:29:38.413052: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-05 00:29:39.427545: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10415 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:09:00.0, compute capability: 6.1


In [14]:
val_tracks.appearances.shape

(27, 71, 277, 32, 32, 1)

In [5]:
# MEM % dropped heavily (at least from 70 to 40 when elements written to TF Record)
# And again when val written to TF Records (down to 14% at the end)
%tb

SystemExit: 2

In [5]:
type(tracks.appearances)

NameError: name 'tracks' is not defined

In [7]:
val_tracks = Track(tracked_data=val_trks, appearance_dim=args.appearance_dim, distance_threshold=args.distance_threshold, crop_mode=args.crop_mode)

NameError: name 'val_trks' is not defined

In [18]:
print('नमस्ते। मैं अब बिहारी यंत्र बन गया हूँ।')

नमस्ते। मैं अब बिहारी यंत्र बन गया हूँ।


In [None]:
def parse_tracking_example(example, dataset_ndims,
                           dtype=tf.float32):
    """Parse a tracking example
    Args:
        example (tf.train.Example): The tracking example to be parsed
        dataset_ndims (dict): Dictionary of dataset metadata
        dtype (tf dtype): Dtype of training data
    """
    # WE MIGHT NEED THE METADATA NOW, TO PARSE IT

    # WHAT IS THE DIFFERENCE BETWEEN X AND y?
    X_names = ['app', 'cent', 'morph', 'adj']
    y_names = ['temp_adj']

    sparse_names = ['adj', 'temp_adj']

    full_name_dict = {'app': 'appearances',
                      'cent': 'centroids',
                      'morph': 'morphologies',
                      'adj': 'adj_matrices',
                      'temp_adj': 'temporal_adj_matrices'}

    # Recreate the example structure
    data = {}
    shape_strings_dict = {}
    shapes_dict = {}

    for key in dataset_ndims:
        if 'shape' in key:
            new_key = '_'.join(key.split('_')[0:-1])
            shapes_dict[new_key] = dataset_ndims[key]

    for key in shapes_dict:
        dataset_ndims.pop('{}_shape'.format(key))

    for key in dataset_ndims:
        if key in sparse_names:
            data[key] = tf.io.SparseFeature(value_key='{}_val'.format(key),
                                            index_key=['{}_ind_{}'.format(key, i)
                                                       for i in range(dataset_ndims[key])],
                                            size=shapes_dict[key],
                                            dtype=tf.float32)
        else:
            data[key] = tf.io.FixedLenFeature([], tf.string)

        shape_strings = ['{}_shape_{}'.format(key, i)
                         for i in range(dataset_ndims[key])]
        shape_strings_dict[key] = shape_strings

        for ss in shape_strings:
            data[ss] = tf.io.FixedLenFeature([], tf.int64)

    # Get data
    content = tf.io.parse_single_example(example, data)

    X_dict = {}
    y_dict = {}

    for key in dataset_ndims:

        # Get the feature and reshape
        if key in sparse_names:
            value = content[key]
        else:
            shape = [content[ss] for ss in shape_strings_dict[key]]
            value = content[key]
            value = tf.io.parse_tensor(value, out_type=dtype)
            value = tf.reshape(value, shape=shape)

        if key in X_names:
            X_dict[full_name_dict[key]] = value
        else:
            y_dict[full_name_dict[key]] = value

    return X_dict, y_dict