In [1]:
import deepinterpolation as de
import sys
from shutil import copyfile
import os
from deepinterpolation.generic import JsonSaver, ClassLoader
import datetime
from typing import Any, Dict
import pathlib
import sys

This is used for record-keeping


In [2]:
now = datetime.datetime.now()
run_uid = now.strftime("%Y_%m_%d_%H_%M")

Initialize meta-parameters objects

In [3]:
training_param = {}
generator_param = {}
network_param = {}
generator_test_param = {}

An epoch is defined as the number of batches pulled from the dataset. Because our datasets are VERY large. Often, we cannot
go through the entirity of the data so we define an epoch slightly differently than is usual.

In [4]:
steps_per_epoch = 10

Those are parameters used for the Validation test generator. Here the test is done on the beginning of the data but
this can be a separate file

In [5]:
generator_test_param["type"] = "generator"  # type of collection
generator_test_param["name"] = "MDAEphysGenerator"  # Name of object in the collection
generator_test_param[
    "pre_post_frame"
] = 30  # Number of frame provided before and after the predicted frame
generator_test_param["train_path"] = os.path.join(
    "..",
    "data",
    "PAIRED_KAMPFF",
    "c14",
)
generator_test_param["batch_size"] = 100
generator_test_param["start_frame"] = 100
generator_test_param["end_frame"] = 1999
generator_test_param[
    "pre_post_omission"
] = 1  # Number of frame omitted before and after the predicted frame
generator_test_param["steps_per_epoch"] = -1  # No step necessary for testing as epochs are not relevant. -1 deactivate it.

Those are parameters used for the main data generator

In [6]:
generator_param["type"] = "generator"
generator_param["steps_per_epoch"] = steps_per_epoch
generator_param["name"] = "MDAEphysGenerator"
generator_param["pre_post_frame"] = 30
generator_param["train_path"] = os.path.join(
    "..",
    "data",
    "PAIRED_KAMPFF",
    "c14",
)
generator_param["batch_size"] = 100
generator_param["start_frame"] = 2000
generator_param["end_frame"] = -1
generator_param["pre_post_omission"] = 1

Those are parameters used for the network topology

In [7]:
network_param["type"] = "network"
network_param[
    "name"
] = "unet_single_ephys_256"  # Name of network topology in the collection

Those are parameters used for the training process

In [8]:
training_param["type"] = "trainer"
training_param["name"] = "core_trainer"
training_param["run_uid"] = run_uid
training_param["batch_size"] = generator_test_param["batch_size"]
training_param["steps_per_epoch"] = steps_per_epoch
training_param[
    "period_save"
] = 25  # network model is potentially saved during training between a regular nb epochs
training_param["nb_gpus"] = 0
training_param["apply_learning_decay"] = 0
training_param[
    "nb_times_through_data"
] = 1  # if you want to cycle through the entire data. Two many iterations will cause noise overfitting
training_param["learning_rate"] = 0.0001
training_param["pre_post_frame"] = generator_test_param["pre_post_frame"]
training_param["loss"] = "mean_absolute_error"
training_param[
    "nb_workers"
] = 1  # this is to enable multiple threads for data generator loading. Useful when this is slower than training

training_param["model_string"] = (
    network_param["name"]
    + "_"
    + training_param["loss"]
    + "_"
    + training_param["run_uid"]
)

Where do you store ongoing training progress

In [9]:
jobdir = os.path.join(
    "ephys", training_param["model_string"] + "_" + run_uid,
)
training_param["output_dir"] = jobdir

try:
    os.makedirs(jobdir)
except FileExistsError:
    print("folder already exists")

Here we create all json files that are fed to the training. This is used for recording purposes as well as input to the training proces

In [10]:
path_training = os.path.join(jobdir, "training.json")
json_obj = JsonSaver(training_param)
json_obj.save_json(path_training)

path_generator = os.path.join(jobdir, "generator.json")
json_obj = JsonSaver(generator_param)
json_obj.save_json(path_generator)

path_test_generator = os.path.join(jobdir, "test_generator.json")
json_obj = JsonSaver(generator_test_param)
json_obj.save_json(path_test_generator)

path_network = os.path.join(jobdir, "network.json")
json_obj = JsonSaver(network_param)
json_obj.save_json(path_network)

Here we create all objects for training.

In [11]:
# We find the generator obj in the collection using the json file
generator_obj = ClassLoader(path_generator)
generator_test_obj = ClassLoader(path_test_generator)

# We find the network obj in the collection using the json file
network_obj = ClassLoader(path_network)

# We find the training obj in the collection using the json file
trainer_obj = ClassLoader(path_training)

# We build the generators object. This will, among other things, calculate normalizing parameters.
train_generator = generator_obj.find_and_build()(path_generator)
test_generator = generator_test_obj.find_and_build()(path_test_generator)

# We build the network object. This will, among other things, calculate normalizing parameters.
network_callback = network_obj.find_and_build()(path_network)

# We build the training object.
training_class = trainer_obj.find_and_build()(train_generator, test_generator, network_callback, path_training)



Start training. This can take very long time.

In [12]:
training_class.run()

Epoch 1/32399
Epoch 2/32399
Epoch 3/32399
Epoch 4/32399
Epoch 5/32399
Epoch 6/32399
Epoch 7/32399
Epoch 8/32399
Epoch 9/32399
Epoch 10/32399
Epoch 11/32399
Epoch 12/32399
Epoch 13/32399
Epoch 14/32399
Epoch 15/32399
Epoch 16/32399
Epoch 17/32399
Epoch 18/32399
Epoch 19/32399
Epoch 20/32399
Epoch 21/32399
Epoch 22/32399
Epoch 23/32399
Epoch 24/32399
Epoch 25/32399
Epoch 00025: val_loss improved from inf to 0.29633, saving model to ephys\unet_single_ephys_256_mean_absolute_error_2021_09_16_03_02_2021_09_16_03_02\2021_09_16_03_02_unet_single_ephys_256_mean_absolute_error_2021_09_16_03_02-0025-0.2963.h5
Epoch 26/32399
Epoch 27/32399
Epoch 28/32399
Epoch 29/32399
Epoch 30/32399
Epoch 31/32399
Epoch 32/32399
Epoch 33/32399
Epoch 34/32399
Epoch 35/32399
Epoch 36/32399
Epoch 37/32399
Epoch 38/32399
Epoch 39/32399
Epoch 40/32399
Epoch 41/32399
Epoch 42/32399
Epoch 43/32399
Epoch 44/32399
Epoch 45/32399
Epoch 46/32399
Epoch 47/32399
Epoch 48/32399
Epoch 49/32399
Epoch 50/32399
Epoch 00050: val_l

KeyboardInterrupt: 

Finalize and save output of the training.

In [None]:
training_class.finalize()