
# Setup


## Imports


In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

from functools import partial
import logging
import pathlib
from pathlib import Path
from pprint import pprint
import sys
from typing import *
import yaml
from yaml import YAMLObject

import humanize
from matplotlib import pyplot as plt, cm
import numpy as np
import pandas as pd
from pymicro.file import file_utils
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import utils
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras import losses

from tomo2seg import modular_unet
from tomo2seg.volume_img_segm import VolumeImgSegmSequence
from tomo2seg.logger import logger
from tomo2seg import data, viz
from tomo2seg.data import Volume, ModelPaths
from tomo2seg.metadata import Metadata

In [3]:
random_state = 42
random_state = np.random.RandomState(random_state)

In [4]:
logger.setLevel(logging.DEBUG)

In [5]:
logger.debug(f"{tf.__version__=}")
logger.info(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}\nThis should be 2 on R790-TOMO.")
logger.debug(f"Both here should return 2 devices...\n{tf.config.list_physical_devices('GPU')=}\n{tf.config.list_logical_devices('GPU')=}")

# xla auto-clustering optimization (see: https://www.tensorflow.org/xla#auto-clustering)
# this seems to break the training
tf.config.optimizer.set_jit(False)

# get a distribution strategy to use both gpus (see https://www.tensorflow.org/guide/distributed_training)
strategy = tf.distribute.MirroredStrategy()  

DEBUG::tomo2seg::{<ipython-input-5-05ac3d1186aa>:<module>:001}::[2020-11-19::10:16:27.304]
tf.__version__='2.2.0'

INFO::tomo2seg::{<ipython-input-5-05ac3d1186aa>:<module>:002}::[2020-11-19::10:16:27.309]
Num GPUs Available: 0
This should be 2 on R790-TOMO.

DEBUG::tomo2seg::{<ipython-input-5-05ac3d1186aa>:<module>:003}::[2020-11-19::10:16:27.364]
Both here should return 2 devices...
tf.config.list_physical_devices('GPU')=[]
tf.config.list_logical_devices('GPU')=[]

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


# Data

In [6]:
from tomo2seg.datasets import (
#     VOLUME_COMPOSITE_V1 as VOLUME_NAME_VERSION,
#     VOLUME_COMPOSITE_V1_LABELS_REFINED3 as LABELS_VERSION
)

volume_name, volume_version = VOLUME_NAME_VERSION
labels_version = LABELS_VERSION

logger.info(f"{volume_name=} {volume_version=} {labels_version=}")

INFO::tomo2seg::{<ipython-input-6-e97f5958400b>:<module>:009}::[2020-11-19::10:16:28.409]
volume_name='PA66GF30' volume_version='v1' labels_version='refined3'



In [7]:
# Metadata/paths objects

## Volume
volume = Volume.with_check(
    name=volume_name, version=volume_version
)
logger.info(f"{volume=}")

def _read_raw(path_: Path, volume_: Volume): 
    # from pymicro
    return file_utils.HST_read(
        str(path_),  # it doesn't accept paths...
        # pre-loaded kwargs
        autoparse_filename=False,  # the file names are not properly formatted
        data_type=volume.metadata.dtype,
        dims=volume.metadata.dimensions,
        verbose=True,
    )

read_raw = partial(_read_raw, volume_=volume)

logger.info("Loading data from disk.")

## Data
voldata = read_raw(volume.data_path)
logger.debug(f"{voldata.shape=}")

voldata_train = volume.train_partition.get_volume_partition(voldata)
voldata_val = volume.val_partition.get_volume_partition(voldata)
logger.debug(f"{voldata_train.shape=} {voldata_val.shape=}")

del voldata

## Labels
vollabels = read_raw(volume.versioned_labels_path(labels_version))
logger.debug(f"{vollabels.shape=}")

vollabels_train = volume.train_partition.get_volume_partition(vollabels)
vollabels_val = volume.val_partition.get_volume_partition(vollabels)
logger.debug(f"{vollabels_train.shape=} {vollabels_val.shape=}")

del vollabels

DEBUG::tomo2seg::{data.py:with_check:214}::[2020-11-17::15:55:28.938]
vol=Volume(name='PA66GF30', version='v1', _metadata=None)

ERROR::tomo2seg::{data.py:with_check:232}::[2020-11-17::15:55:28.940]
Missing file: /home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.labels.raw

Missing file: /home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.weights.raw

DEBUG::tomo2seg::{data.py:metadata:171}::[2020-11-17::15:55:28.942]
Loading metadata from `/home/users/jcasagrande/projects/tomo2seg/data/PA66GF30.v1/PA66GF30.v1.metadata.yml`.

INFO::tomo2seg::{<ipython-input-7-d495b0b61438>:<module>:007}::[2020-11-17::15:55:28.948]
volume=Volume(name='PA66GF30', version='v1', _metadata=Volume.Metadata(dimensions=[1300, 1040, 1900], dtype='uint8', labels=[0, 1, 2], labels_names={0: 'matrix', 1: 'fiber', 2: 'porosity'}, set_partitions={'train': {'x_range': [0, 1299], 'y_range': [0, 1039], 'z_range': [0, 1299], 'alias': 'train'}, 'val': {'x_range': [0, 1299], 'y_rang

In [8]:
raise Exception()

Exception: 

## Data Sequences

In [None]:
batch_size_per_replica = 16  
batch_size = batch_size_per_replica * mirrored_strategy.num_replicas_in_sync
crop_size = 224  # multiple of 16 (requirement of a 4-level u-net)
n_geometric_augmentations = 3
slice_axes = (0, 1, 2,)

train_generator = VolumeImgSegmSequence(
    source_volume=train_volume, 
    label_volume=train_labels,
    labels=labels_list, 
    axes=slice_axes, 
    batch_size=batch_size,
    shuffle=True,
    normalization_const=255,
    n_geometric_augmentations=n_geometric_augmentations,
    random_state=42,
    crop_size=crop_size,
)

val_generator = VolumeImgSegmSequence(
    source_volume=val_volume, 
    label_volume=val_labels,
    labels=labels_list, 
    axes=(2,), 
    batch_size=10,
    shuffle=False,
    normalization_const=255,
    n_geometric_augmentations=0,
    crop_size=crop_size,
)

# Model

In [None]:
model_name = "dryrun-03"
model_paths = ModelPaths(model_name)
logger.info("Model paths object: %s", model_paths)

input_shape = (crop_size, crop_size, 1)
nb_filters_0 = 12
model_generator_function = modular_unet.u_net

# lr = 4e-3
# n_epochs = 30


In [None]:
with mirrored_strategy.scope():
    if not pathlib.Path(str(model_paths.autosaved_model_path) + '.hdf5').exists():
        assert not model_paths.model_path.exists(), f"Please delete '{model_paths.model_path}' to redefine it."

        model = model_generator_function(input_shape, nb_filters_0=nb_filters_0, output_channels=3, name=model_name)
        model.save(filepath=model_paths.model_path, overwrite=False)

    else:
        logger.warning("An autosaved model already exists, loading it instead of creating a new one!")
        model = keras.models.load_model(str(model_paths.autosaved_model_path) + '.hdf5')


#     optimizer = optimizers.Adam(lr=lr)
    optimizer = optimizers.Adam()
    loss_func = losses.categorical_crossentropy    
    
    model.compile(loss=loss_func, optimizer=optimizer)
    model.save(model_paths.model_path)

In [None]:
# write the model summary in a file
with model_paths.summary_path.open("w") as f:
    def print_to_txt(line):
        f.writelines([line + "\n"])
    model.summary(print_fn=print_to_txt, line_length=140)

# same for the architecture
utils.plot_model(model, show_shapes=True, to_file=model_paths.architecture_plot_path);



# Callbacks

In [None]:

autosave_cb = callbacks.ModelCheckpoint(
    str(model_paths.autosaved_model_path) + ".hdf5", 
    monitor="val_loss", 
    verbose=0, 
    save_best_only=True, 
    mode="auto",
)

reduce_lr_cb = ReduceLROnPlateauBacktrack(
    model,
    str(model_paths.autosaved_model_path) + ".hdf5", 
    monitor="val_loss",
    factor=0.5,
    patience=5,
    verbose=1,
    mode="auto",
    min_delta=0,
    min_lr=1e-5,
),

logger_cb = callbacks.CSVLogger(
    str(model_paths.logger_path), separator=",", append=False
)

def get_logspace_lr_cb(min_lr, max_lr, n_epochs, epoch_zero=0):
    schedule = np.logspace(min_lr, max_lr, n_epochs)
    def schedule_function(epoch, lr):
        assert epoch - epoch_zero < n_epochs, "Schedule is over!"
        return schedule[epoch - epoch_zero]
    return tf.keras.callbacks.LearningRateScheduler(schedule_function)

lr_range_test_schedule_cb = get_logspace_lr_cb(-4.5, -1, 30)

In [None]:
# unique, counts = np.unique(train_labels, return_counts=True)
# class_freqs = dict(zip(unique, counts))
# total = sum(class_freqs.values())
# class_freqs = {k: v / total for k, v in class_freqs.items()}
# class_freqs
# class_freqs_inv = {k: 1. / v for k, v in class_freqs.items()}
# class_freqs_inv
# import tensorflow as tf
# from tensorflow.python.keras import backend as K
# weights = [class_freqs_inv[i] for i in range(len(class_freqs_inv))]

# def weighted_cross_entropy(y_true, y_pred):
#   Kweights = K.constant(weights)
#   if not K.is_keras_tensor(y_pred):
#     y_pred = K.constant(y_pred)
#   y_true = K.cast(y_true, y_pred.dtype)
#   return K.categorical_crossentropy(y_true, y_pred) * K.sum(y_true * y_pred, axis=-1)

# Summary before training

In [None]:
# stuff that i use after the training but i want it to appear in the 
# pre-training summary

## Metadata

## Volume slices

## Generator samples

# Learning rate range test

In [None]:
cb = [
    autosave_cb,
    logger_cb,
    callbacks.TerminateOnNaN(),
#     reduce_lr_cb,
    lr_range_test_schedule_cb
]

train_generator.force_shorter_epoch = 100

In [None]:
history_lr_rate_range_test = model.fit(
    x=train_generator,
    validation_data=val_generator,
    validation_steps=100,  
    epochs=30,  
    callbacks=cb,  
    verbose=1,
    use_multiprocessing=False,   
    
)

In [None]:
plt.subplots(figsize=(10,10))
plt.tight_layout()
viz.display_training_curves(
    history_lr_rate_range_test.history['loss'], history_lr_rate_range_test.history['val_loss'], 'loss', 111, x=np.logspace(-4.5, -1, 30)
)
plt.xscale('log')

# Training


In [None]:
model = tf.keras.models.load_model(str(model_paths.autosaved_model_path) + ".hdf5")

In [None]:
lr = 0.001

from keras import backend as K
K.set_value(model.optimizer.learning_rate, lr)

cb = [
    autosave_cb,
    logger_cb,
    callbacks.TerminateOnNaN(),
    reduce_lr_cb,
#     lr_range_test_schedule_cb
]

train_generator.force_shorter_epoch = 100

In [None]:
history = model.fit(
    x=train_generator,
    validation_data=val_generator,
    validation_steps=100,  
    epochs=40,  
    callbacks=cb,  
    verbose=1,
    use_multiprocessing=False,   
    
    initial_epoch=30,
)

In [None]:
plt.subplots(figsize=(10,10))
plt.tight_layout()
viz.display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 111)

# Training 2nd round

In [None]:
model.optimizer.learning_rate / 2

In [None]:
reduce_lr_cb = ReduceLROnPlateauBacktrack(
    model,
    str(model_paths.autosaved_model_path) + ".hdf5",  
    monitor="val_loss",
    factor=0.5,
    patience=4,
    verbose=1,
    mode="auto",
    min_delta=0,
    min_lr=1e-5,
)

lr = 1.5773934e-05

from tensorflow.keras import backend as K
K.set_value(model.optimizer.learning_rate, lr)

cb = [
    autosave_cb,
    logger_cb,
    callbacks.TerminateOnNaN(),
    reduce_lr_cb,
#     lr_range_test_schedule_cb
]

train_generator.force_shorter_epoch = 300

In [None]:
history_2 = model.fit(
    x=train_generator,
    validation_data=val_generator,
    validation_steps=100,  
    epochs=55,  
    callbacks=cb,  
    verbose=1,
    use_multiprocessing=False,   
    
    initial_epoch=40,
)

In [None]:
with mirrored_strategy.scope():
    model = tf.keras.models.load_model(str(model_paths.autosaved_model_path) + ".hdf5")

In [None]:
reduce_lr_cb = ReduceLROnPlateauBacktrack(
    model,
    str(model_paths.autosaved_model_path) + ".hdf5",  
    monitor="val_loss",
    factor=2./3,
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0,
    min_lr=1e-7,
)

lr = 1e-05

from tensorflow.keras import backend as K
K.set_value(model.optimizer.learning_rate, lr)

cb = [
    autosave_cb,
    logger_cb,
    callbacks.TerminateOnNaN(),
    reduce_lr_cb,
#     lr_range_test_schedule_cb
]

train_generator.force_shorter_epoch = None

In [None]:
model.save(model_paths.model_path)