diff --git a/2019-asr-resnet-lace-cnn/LACE/DataAug/lace_specaug_logmel+d+dd.config b/2019-asr-resnet-lace-cnn/LACE/DataAug/lace_specaug_logmel+d+dd.config new file mode 100644 index 00000000..66c9c715 --- /dev/null +++ b/2019-asr-resnet-lace-cnn/LACE/DataAug/lace_specaug_logmel+d+dd.config @@ -0,0 +1,314 @@ +#!crnn/rnn.py +# kate: syntax python; +# see also file:///u/zeyer/setups/quaero-en/training/quaero-train11/50h/ann/2015-07-29--lstm-gt50/config-train/dropout01.3l.n500.custom_lstm.adam.lr1e_3.config + +import os +import numpy +from subprocess import check_output + +# task +use_tensorflow = True +task = "train" +device = "gpu" +multiprocessing = True +update_on_device = True + +_cf_cache = {} + +def cf(filename): + """Cache manager""" + if filename in _cf_cache: + return _cf_cache[filename] + if check_output(["hostname"]).strip().decode("utf8") in ["sulfid", "zink", "cobalt", "niob"]: + print("use local file: %s" % filename) + return filename # for debugging + cached_fn = check_output(["cf", filename]).strip().decode("utf8") + assert os.path.exists(cached_fn) + _cf_cache[filename] = cached_fn + return cached_fn + +# data +context_window = 1 + +window = 1 # +feature_dim = 64 # LogMel 64-dim +channel_num = 3 +num_inputs = feature_dim * channel_num * window +num_outputs = 9001 # CART labels +EpochSplit = 6 + +def get_sprint_dataset(data): + assert data in ["train", "cv"] + epochSplit = {"train": EpochSplit, "cv": 1} + + # see /u/tuske/work/ASR/switchboard/corpus/readme + # and zoltans mail https://mail.google.com/mail/u/0/#inbox/152891802cbb2b40 + files = {} + files["config"] = "config/training.config" + files["corpus"] = "/u/corpora/speech/switchboard-1/xml/swb1-all/swb1-all.corpus.gz" + files["segments"] = "dependencies/seg_%s" % {"train":"train", "cv":"cv_head3000"}[data] + files["features"] = "/u/bozheniuk/setups/switchboard/feature_extraction/cluster_setup/logmel64_30/data/logmel.train.bundle" + files["lexicon"] = "/u/tuske/work/ASR/switchboard/corpus/train.lex.v1_0_3.ci.gz" + files["alignment"] = "dependencies/tuske__2016_01_28__align.combined.train" + files["cart"] = "/u/tuske/work/ASR/switchboard/initalign/data/%s" % {9001: "cart-9000"}[num_outputs] + for k, v in sorted(files.items()): + assert os.path.exists(v), "%s %r does not exist" % (k, v) + estimated_num_seqs = {"train": 227047, "cv": 3000} # wc -l segment-file + + # features: /u/tuske/work/ASR/switchboard/feature.extraction/gt40_40/data/gt.train.* + args = [ + "--config=" + files["config"], + lambda: "--*.corpus.file=" + cf(files["corpus"]), + lambda: "--*.corpus.segments.file=" + cf(files["segments"]), + {"train": "--*.corpus.segment-order-shuffle=true", "cv": "--*.corpus.segment-order-sort-by-time-length=true"}[data], + "--*.state-tying.type=cart", + lambda: "--*.state-tying.file=" + cf(files["cart"]), + "--*.trainer-output-dimension=%i" % num_outputs, + lambda: "--*.lexicon.file=" + cf(files["lexicon"]), + lambda: "--*.alignment-cache-path=" + cf(files["alignment"]), + lambda: "--*.feature-cache-path=" + cf(files["features"]), + #"--*.mean-normalization.file=dependencies/setup-base/step254-hybrid-mlp-ibm-cmllr/mlp.4/train_mlp/normalize_layer1/mean", + #"--*.variance-normalization.file=dependencies/setup-base/step254-hybrid-mlp-ibm-cmllr/mlp.4/train_mlp/normalize_layer1/std", + "--*.log-channel.file=log/crnn.sprint.train-dataset.xml", + "--*.window-size=1", + "--*.trainer-output-dimension=%i" % num_outputs + ] + return { + "class": "ExternSprintDataset", "sprintTrainerExecPath": "sprint-executables/nn-trainer", + "sprintConfigStr": args, + "partitionEpoch": epochSplit[data], + "estimated_num_seqs": estimated_num_seqs[data] // (epochSplit[data] or 1), + } + +cache_size = "0" +# network +# (also defined by num_inputs & num_outputs) +dropout = 0.05 +ldropout = 0.05 +L2 = 0.1 + +# bn params +masked_time = True +fused = True +axes = ["f"] + +bn_momentum = 0.997 +bn_epsilon = 1e-5 + +cur_feat_dim = feature_dim +network = {} +_last = "data" + +def add_sequential_layer(name, d, from_=None): + global _last, network + assert "from" not in d + if from_ is not None: + d["from"] = from_ + else: + d["from"] = [_last] + assert name not in network + network[name] = d + _last = name + +# data augmentation +def summary(name, x): + """ + :param str name: + :param tf.Tensor x: (batch,time,feature) + """ + import tensorflow as tf + # tf.summary.image wants [batch_size, height, width, channels], + # we have (batch, time, feature). + img = tf.expand_dims(x, axis=3) # (batch,time,feature,1) + img = tf.transpose(img, [0, 2, 1, 3]) # (batch,feature,time,1) + tf.summary.image(name, img, max_outputs=10) + tf.summary.scalar("%s_max_abs" % name, tf.reduce_max(tf.abs(x))) + mean = tf.reduce_mean(x) + tf.summary.scalar("%s_mean" % name, mean) + stddev = tf.sqrt(tf.reduce_mean(tf.square(x - mean))) + tf.summary.scalar("%s_stddev" % name, stddev) + tf.summary.histogram("%s_hist" % name, tf.reduce_max(tf.abs(x), axis=2)) + + +def _mask(x, axis, pos, max_amount): + """ + :param tf.Tensor x: (batch,time,feature) + :param int axis: + :param tf.Tensor pos: (batch,) + :param int max_amount: inclusive + """ + import tensorflow as tf + ndim = x.get_shape().ndims + n_batch = tf.shape(x)[0] + dim = tf.shape(x)[axis] + amount = tf.random_uniform(shape=(n_batch,), minval=1, maxval=max_amount + 1, dtype=tf.int32) + pos2 = tf.minimum(pos + amount, dim) + idxs = tf.expand_dims(tf.range(0, dim), 0) # (1,dim) + pos_bc = tf.expand_dims(pos, 1) # (batch,1) + pos2_bc = tf.expand_dims(pos2, 1) # (batch,1) + cond = tf.logical_and(tf.greater_equal(idxs, pos_bc), tf.less(idxs, pos2_bc)) # (batch,dim) + cond = tf.reshape(cond, [tf.shape(x)[i] if i in (0, axis) else 1 for i in range(ndim)]) + from TFUtil import where_bc + x = where_bc(cond, 0.0, x) + return x + + +def random_mask(x, axis, min_num, max_num, max_dims): + """ + :param tf.Tensor x: (batch,time,feature) + :param int axis: + :param int|tf.Tensor min_num: + :param int|tf.Tensor max_num: inclusive + :param int max_dims: inclusive + """ + import tensorflow as tf + n_batch = tf.shape(x)[0] + num = tf.random_uniform(shape=(n_batch,), minval=min_num, maxval=max_num + 1, dtype=tf.int32) + # https://github.com/tensorflow/tensorflow/issues/9260 + # https://timvieira.github.io/blog/post/2014/08/01/gumbel-max-trick-and-weighted-reservoir-sampling/ + z = -tf.log(-tf.log(tf.random_uniform((n_batch, tf.shape(x)[axis]), 0, 1))) + _, indices = tf.nn.top_k(z, tf.reduce_max(num)) + _, x = tf.while_loop( + cond=lambda i, _: tf.less(i, tf.reduce_max(num)), + body=lambda i, x: ( + i + 1, + tf.where( + tf.less(i, num), + _mask(x, axis=axis, pos=indices[:, i], max_amount=max_dims), + x)), + loop_vars=(0, x)) + return x + + +def random_warp(x, std, scale): + """ + :param tf.Tensor x: (batch,time,dim) + :param (float,float) std: + :param (float,float) scale: + :rtype: tf.Tensor + :return: x transformed + """ + import tensorflow as tf + from TFUtil import create_random_warp_flow_2d, dense_image_warp + x = tf.expand_dims(x, axis=-1) + flow = create_random_warp_flow_2d(tf.shape(x)[:-1], std=std, scale=scale) + x = dense_image_warp(x, flow=flow) + x = tf.squeeze(x, axis=-1) + return x + + +def transform(x, network): + import tensorflow as tf + def get_masked(): + x_masked = x + x_masked = random_mask(x_masked, axis=1, min_num=1, max_num=3, max_dims=40) + x_masked = random_mask(x_masked, axis=2, min_num=1, max_num=2, max_dims=30) + return x_masked + x = network.cond_on_train(get_masked, lambda: x) + return x + + +def jump_net(prefix, inputs, filters, dilation_rate, data_format, conv_time_dim=False): + if data_format == 'channels_first': + NCHW = True + else: + NCHW = False + filter_size = (3, 3) + strides = (1, 1) + padding = "SAME" + use_frequency = False + add_sequential_layer("%s_c1" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW, + "strides": strides, "dilation_rate": (dilation_rate, 1), "padding": padding, "activation": None, "with_bias": False, "dropout": dropout, + "forward_weights_init": "xavier", "L2": L2}) + add_sequential_layer("%s_bn1" % prefix, {"class": "batch_norm", "masked_time": masked_time, "axes": axes, "fused": fused, "momentum": bn_momentum, "epsilon": bn_epsilon}) + add_sequential_layer("%s_y1" % prefix, {"class": "activation", "activation": "relu", "batch_norm": False}) + add_sequential_layer("%s_c2" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW, + "strides": strides, "dilation_rate": (dilation_rate, 1), "padding": padding, "activation": None, "with_bias": False, "dropout": dropout, + "forward_weights_init": "xavier", "L2": L2}) + + add_sequential_layer("%s_p" % prefix, {"class": "combine", "kind": "add"}, from_=["%s_c2" % prefix, inputs]) + add_sequential_layer("%s_bn2" % prefix, {"class": "batch_norm", "masked_time": masked_time, "axes": axes, "fused": fused, "momentum": bn_momentum, "epsilon": bn_epsilon}) + add_sequential_layer("%s_o" % prefix, {"class": "activation", "activation": "relu", "batch_norm": False}) + return "%s_o" % prefix + + +def jump_block(prefix, inputs, jump_net_num, filters, data_format, dilation_rate, attention=False, conv_time_dim=False): + if data_format == 'channels_first': + NCHW = True + else: + NCHW = False + filter_size = (3, 3) + strides = (1, 1) + padding = "SAME" + use_frequency = False + add_sequential_layer("%s_c" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW, + "strides": strides, "dilation_rate": dilation_rate, "padding": padding, "activation": None, "with_bias": False, "dropout": dropout, + "forward_weights_init": "xavier", "L2": L2}) + # doing strides + add_sequential_layer("%s_c_strides" % prefix, {"class": "slice", "axis": "s:1", "slice_step": 2}) + + dilation_rate *= 2 + # print("dilation_rate: ", dilation_rate) + inputs = "%s_c_strides" % prefix + for i in range(1, jump_net_num + 1): + inputs = jump_net("%s_net%i" % (prefix, i + 1), inputs, filters, dilation_rate, data_format=data_format, conv_time_dim=conv_time_dim) + return inputs + + +def lacea(inputs): + block_sizes = [2, 2, 2, 2] + filters = 128 + data_format = 'channels_last' + conv_time_dim = True + dilation_rate = 1 + attention = False + + for i, num_blocks in enumerate(block_sizes): + inputs = jump_block("block%i" % (i + 1), inputs, num_blocks, filters, data_format, dilation_rate, attention=attention, conv_time_dim=conv_time_dim) + filters *= 2 + dilation_rate *= 2 + return + +add_sequential_layer("split", {"class": "split_dims", "axis": "f", "dims": (channel_num, feature_dim)}) # output: (batch, time, window = 61, feature = 40, channel = 1) +add_sequential_layer("swap_axes", {"class": "swap_axes", "axis1": "s:1", "axis2": "f"}) +add_sequential_layer("data_aug", {"class": "eval", "eval": "self.network.get_config().typed_value('transform')(source(0), network=self.network)"}) +lacea(inputs="data_aug") +add_sequential_layer("out_pool", {"class": "reduce", "mode": "avg", "axes": "s:1", "keep_dims": False}) +add_sequential_layer("output", {"class": "softmax", "loss": "ce", "L2": L2, "n_out": num_outputs, "loss_opts": {"focal_loss_factor": 2.0}, "dropout": ldropout}) + +train = get_sprint_dataset("train") +dev = get_sprint_dataset("cv") + +############## debug stuff +debug_print_layer_output_template = True # useful for debugging +#debug_print_layer_output_sizes = True +#debug_print_layer_output_shape = True # might be useful for debugging +#debug_shell_in_runner = True +log_batch_size = True +tf_log_memory_usage = True +############## debug stuff + +# trainer +batching = "random" +batch_size = 24 * (150) +max_seqs = 500 +chunking = "150:150" +truncation = -1 +num_epochs = 120 +gradient_clip = 0 +gradient_noise = 0.1 +momentum = 0.99 +learning_rate = 5e-6 +learning_rate_file = "newbob.data" +learning_rate_control = "newbob_multi_epoch" +learning_rate_control_relative_error_relative_lr = True +newbob_multi_num_epochs = 6 +newbob_multi_update_interval = 1 +model = "net-model/network" + +cleanup_old_models = True +store_metadata_mod_step = None + +# log +log = "log/crnn.train.log" +log_verbosity = 5 diff --git a/2019-asr-resnet-lace-cnn/LACE/DataAug/lacefs_specaug_gt.config b/2019-asr-resnet-lace-cnn/LACE/DataAug/lacefs_specaug_gt.config new file mode 100644 index 00000000..1e0959ee --- /dev/null +++ b/2019-asr-resnet-lace-cnn/LACE/DataAug/lacefs_specaug_gt.config @@ -0,0 +1,306 @@ +#!crnn/rnn.py +# kate: syntax python; +# see also file:///u/zeyer/setups/quaero-en/training/quaero-train11/50h/ann/2015-07-29--lstm-gt50/config-train/dropout01.3l.n500.custom_lstm.adam.lr1e_3.config + +import os +import numpy +from subprocess import check_output + +# task +use_tensorflow = True +task = "train" +device = "gpu" +multiprocessing = True +update_on_device = True + +_cf_cache = {} + +def cf(filename): + """Cache manager""" + if filename in _cf_cache: + return _cf_cache[filename] + if check_output(["hostname"]).strip().decode("utf8") in ["sulfid", "zink", "cobalt", "niob"]: + print("use local file: %s" % filename) + return filename # for debugging + cached_fn = check_output(["cf", filename]).strip().decode("utf8") + assert os.path.exists(cached_fn) + _cf_cache[filename] = cached_fn + return cached_fn + +# data +context_window = 1 +window = 1 # 30-1-30 +feature_dim = 40 # Gammatone 40-dim +channel_num = 1 +num_inputs = feature_dim * channel_num * window +num_outputs = 9001 # CART labels +EpochSplit = 6 + +def get_sprint_dataset(data): + assert data in ["train", "cv"] + epochSplit = {"train": EpochSplit, "cv": 1} + # see /u/tuske/work/ASR/switchboard/corpus/readme + # and zoltans mail https://mail.google.com/mail/u/0/#inbox/152891802cbb2b40 + files = {} + files["config"] = "config/training.config" + files["corpus"] = "/u/corpora/speech/switchboard-1/xml/swb1-all/swb1-all.corpus.gz" + files["segments"] = "dependencies/seg_%s" % {"train":"train", "cv":"cv_head3000"}[data] + files["features"] = "/u/tuske/work/ASR/switchboard/feature.extraction/gt40_40/data/gt.train.bundle" + files["lexicon"] = "/u/tuske/work/ASR/switchboard/corpus/train.lex.v1_0_3.ci.gz" + files["alignment"] = "dependencies/tuske__2016_01_28__align.combined.train" + files["cart"] = "/u/tuske/work/ASR/switchboard/initalign/data/%s" % {9001: "cart-9000"}[num_outputs] + for k, v in sorted(files.items()): + assert os.path.exists(v), "%s %r does not exist" % (k, v) + estimated_num_seqs = {"train": 227047, "cv": 3000} # wc -l segment-file + args = [ + "--config=" + files["config"], + lambda: "--*.corpus.file=" + cf(files["corpus"]), + lambda: "--*.corpus.segments.file=" + cf(files["segments"]), + {"train": "--*.corpus.segment-order-shuffle=true", "cv": "--*.corpus.segment-order-sort-by-time-length=true"}[data], + "--*.state-tying.type=cart", + lambda: "--*.state-tying.file=" + cf(files["cart"]), + "--*.trainer-output-dimension=%i" % num_outputs, + lambda: "--*.lexicon.file=" + cf(files["lexicon"]), + lambda: "--*.alignment-cache-path=" + cf(files["alignment"]), + lambda: "--*.feature-cache-path=" + cf(files["features"]), + "--*.log-channel.file=log/crnn.sprint.train-dataset.xml", + "--*.window-size=1", + "--*.trainer-output-dimension=%i" % num_outputs + ] + return { + "class": "ExternSprintDataset", "sprintTrainerExecPath": "sprint-executables/nn-trainer", + "sprintConfigStr": args, + "partitionEpoch": epochSplit[data], + "estimated_num_seqs": estimated_num_seqs[data] // (epochSplit[data] or 1), + } +cache_size = "0" +# network +# (also defined by num_inputs & num_outputs) +dropout = 0.05 +ldropout = 0.05 +L2 = 0.1 + +# bn params +masked_time = False +fused = True +axes = ["f"] + +bn_momentum = 0.997 +bn_epsilon = 1e-5 + +cur_feat_dim = feature_dim +network = {} +_last = "data" + +def add_sequential_layer(name, d, from_=None): + global _last, network + assert "from" not in d + if from_ is not None: + d["from"] = from_ + else: + d["from"] = [_last] + assert name not in network + network[name] = d + _last = name + +# data augmentation +def summary(name, x): + """ + :param str name: + :param tf.Tensor x: (batch,time,feature) + """ + import tensorflow as tf + # tf.summary.image wants [batch_size, height, width, channels], + # we have (batch, time, feature). + img = tf.expand_dims(x, axis=3) # (batch,time,feature,1) + img = tf.transpose(img, [0, 2, 1, 3]) # (batch,feature,time,1) + tf.summary.image(name, img, max_outputs=10) + tf.summary.scalar("%s_max_abs" % name, tf.reduce_max(tf.abs(x))) + mean = tf.reduce_mean(x) + tf.summary.scalar("%s_mean" % name, mean) + stddev = tf.sqrt(tf.reduce_mean(tf.square(x - mean))) + tf.summary.scalar("%s_stddev" % name, stddev) + tf.summary.histogram("%s_hist" % name, tf.reduce_max(tf.abs(x), axis=2)) + + +def _mask(x, axis, pos, max_amount): + """ + :param tf.Tensor x: (batch,time,feature) + :param int axis: + :param tf.Tensor pos: (batch,) + :param int max_amount: inclusive + """ + import tensorflow as tf + ndim = x.get_shape().ndims + n_batch = tf.shape(x)[0] + dim = tf.shape(x)[axis] + amount = tf.random_uniform(shape=(n_batch,), minval=1, maxval=max_amount + 1, dtype=tf.int32) + pos2 = tf.minimum(pos + amount, dim) + idxs = tf.expand_dims(tf.range(0, dim), 0) # (1,dim) + pos_bc = tf.expand_dims(pos, 1) # (batch,1) + pos2_bc = tf.expand_dims(pos2, 1) # (batch,1) + cond = tf.logical_and(tf.greater_equal(idxs, pos_bc), tf.less(idxs, pos2_bc)) # (batch,dim) + cond = tf.reshape(cond, [tf.shape(x)[i] if i in (0, axis) else 1 for i in range(ndim)]) + from TFUtil import where_bc + x = where_bc(cond, 0.0, x) + return x + + +def random_mask(x, axis, min_num, max_num, max_dims): + """ + :param tf.Tensor x: (batch,time,feature) + :param int axis: + :param int|tf.Tensor min_num: + :param int|tf.Tensor max_num: inclusive + :param int max_dims: inclusive + """ + import tensorflow as tf + n_batch = tf.shape(x)[0] + num = tf.random_uniform(shape=(n_batch,), minval=min_num, maxval=max_num + 1, dtype=tf.int32) + # https://github.com/tensorflow/tensorflow/issues/9260 + # https://timvieira.github.io/blog/post/2014/08/01/gumbel-max-trick-and-weighted-reservoir-sampling/ + z = -tf.log(-tf.log(tf.random_uniform((n_batch, tf.shape(x)[axis]), 0, 1))) + _, indices = tf.nn.top_k(z, tf.reduce_max(num)) + _, x = tf.while_loop( + cond=lambda i, _: tf.less(i, tf.reduce_max(num)), + body=lambda i, x: ( + i + 1, + tf.where( + tf.less(i, num), + _mask(x, axis=axis, pos=indices[:, i], max_amount=max_dims), + x)), + loop_vars=(0, x)) + return x + + +def random_warp(x, std, scale): + """ + :param tf.Tensor x: (batch,time,dim) + :param (float,float) std: + :param (float,float) scale: + :rtype: tf.Tensor + :return: x transformed + """ + import tensorflow as tf + from TFUtil import create_random_warp_flow_2d, dense_image_warp + x = tf.expand_dims(x, axis=-1) + flow = create_random_warp_flow_2d(tf.shape(x)[:-1], std=std, scale=scale) + x = dense_image_warp(x, flow=flow) + x = tf.squeeze(x, axis=-1) + return x + + +def transform(x, network): + import tensorflow as tf + def get_masked(): + x_masked = x + x_masked = random_mask(x_masked, axis=1, min_num=0, max_num=2, max_dims=10) + x_masked = random_mask(x_masked, axis=2, min_num=0, max_num=1, max_dims=10) + return x_masked + x = network.cond_on_train(get_masked, lambda: x) + return x + + +def jump_net(prefix, inputs, filters, dilation_rate, data_format, conv_time_dim=False): + if data_format == 'channels_first': + NCHW = True + else: + NCHW = False + filter_size = (3, 3) + strides = (1, 1) + #dilation_rate = (1,) + padding = "SAME" + use_frequency = False + add_sequential_layer("%s_c1" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW, + "strides": strides, "dilation_rate": (dilation_rate, 1), "padding": padding, "activation": None, "with_bias": False, "dropout": dropout, + "forward_weights_init": "xavier", "L2": L2}) + add_sequential_layer("%s_bn1" % prefix, {"class": "batch_norm", "masked_time": masked_time, "axes": axes, "fused": fused, "momentum": bn_momentum, "epsilon": bn_epsilon}) + add_sequential_layer("%s_y1" % prefix, {"class": "activation", "activation": "relu", "batch_norm": False}) + add_sequential_layer("%s_c2" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW, + "strides": strides, "dilation_rate": (dilation_rate, 1), "padding": padding, "activation": None, "with_bias": False, "dropout": dropout, + "forward_weights_init": "xavier", "L2": L2}) + + add_sequential_layer("%s_p" % prefix, {"class": "combine", "kind": "add"}, from_=["%s_c2" % prefix, inputs]) + add_sequential_layer("%s_bn2" % prefix, {"class": "batch_norm", "masked_time": masked_time, "axes": axes, "fused": fused, "momentum": bn_momentum, "epsilon": bn_epsilon}) + add_sequential_layer("%s_o" % prefix, {"class": "activation", "activation": "relu", "batch_norm": False}) + return "%s_o" % prefix + + +def jump_block(prefix, inputs, jump_net_num, filters, data_format, dilation_rate, attention=False, conv_time_dim=False): + if data_format == 'channels_first': + NCHW = True + else: + NCHW = False + filter_size = (3, 3) + strides = (1, 1) + padding = "SAME" + use_frequency = False + add_sequential_layer("%s_c" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW, + "strides": strides, "dilation_rate": dilation_rate, "padding": padding, "activation": None, "with_bias": False, "dropout": dropout, + "forward_weights_init": "xavier", "L2": L2}) + # doing strides + add_sequential_layer("%s_c_strides" % prefix, {"class": "slice", "axis": "s:1", "slice_step": 2}) + + dilation_rate *= 2 + inputs = "%s_c_strides" % prefix + for i in range(1, jump_net_num + 1): + inputs = jump_net("%s_net%i" % (prefix, i + 1), inputs, filters, dilation_rate, data_format=data_format, conv_time_dim=conv_time_dim) + return inputs + + +def lacea(inputs): + block_sizes = [2, 2, 2, 2] + filters = 128 + data_format = 'channels_last' + conv_time_dim = True + dilation_rate = 1 + attention = False + + for i, num_blocks in enumerate(block_sizes): + inputs = jump_block("block%i" % (i + 1), inputs, num_blocks, filters, data_format, dilation_rate, attention=attention, conv_time_dim=conv_time_dim) + filters *= 2 + dilation_rate *= 2 + return + +add_sequential_layer("split", {"class": "split_dims", "axis": "f", "dims": (channel_num, feature_dim)}) # output: (batch, time, window = 61, feature = 40, channel = 1) +add_sequential_layer("swap_axes", {"class": "swap_axes", "axis1": "s:1", "axis2": "f"}) +add_sequential_layer("data_aug", {"class": "eval", "eval": "self.network.get_config().typed_value('transform')(source(0), network=self.network)"}) +lacea(inputs="data_aug")add_sequential_layer("out_pool", {"class": "reduce", "mode": "avg", "axes": "s:1", "keep_dims": False}) +add_sequential_layer("output", {"class": "softmax", "loss": "ce", "L2": L2, "n_out": num_outputs, "loss_opts": {"focal_loss_factor": 2.0}, "dropout": ldropout}) + +train = get_sprint_dataset("train") +dev = get_sprint_dataset("cv") + +############## debug stuff +debug_print_layer_output_template = True # useful for debugging +#debug_print_layer_output_sizes = True +#debug_print_layer_output_shape = True # might be useful for debugging +#debug_shell_in_runner = True +log_batch_size = True +tf_log_memory_usage = True +############## debug stuff + +# trainer +batching = "random" +batch_size = 32 * (150) +max_seqs = 500 +chunking = "150:150" +truncation = -1 +num_epochs = 120 +gradient_clip = 0 +gradient_noise = 0.1 +momentum = 0.99 +learning_rate = 5e-6 +learning_rate_file = "newbob.data" +learning_rate_control = "newbob_multi_epoch" +learning_rate_control_relative_error_relative_lr = True +newbob_multi_num_epochs = 6 +newbob_multi_update_interval = 1 +model = "net-model/network" + +cleanup_old_models = True +store_metadata_mod_step = None + +# log +log = "log/crnn.train.log" +log_verbosity = 5