Skip to content

Commit

Permalink
LACE dataaug configs
Browse files Browse the repository at this point in the history
  • Loading branch information
squarenabla committed Nov 22, 2019
1 parent 1259529 commit f5f36fa
Show file tree
Hide file tree
Showing 2 changed files with 620 additions and 0 deletions.
314 changes: 314 additions & 0 deletions 2019-asr-resnet-lace-cnn/LACE/DataAug/lace_specaug_logmel+d+dd.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
#!crnn/rnn.py
# kate: syntax python;
# see also file:///u/zeyer/setups/quaero-en/training/quaero-train11/50h/ann/2015-07-29--lstm-gt50/config-train/dropout01.3l.n500.custom_lstm.adam.lr1e_3.config

import os
import numpy
from subprocess import check_output

# task
use_tensorflow = True
task = "train"
device = "gpu"
multiprocessing = True
update_on_device = True

_cf_cache = {}

def cf(filename):
"""Cache manager"""
if filename in _cf_cache:
return _cf_cache[filename]
if check_output(["hostname"]).strip().decode("utf8") in ["sulfid", "zink", "cobalt", "niob"]:
print("use local file: %s" % filename)
return filename # for debugging
cached_fn = check_output(["cf", filename]).strip().decode("utf8")
assert os.path.exists(cached_fn)
_cf_cache[filename] = cached_fn
return cached_fn

# data
context_window = 1

window = 1 #
feature_dim = 64 # LogMel 64-dim
channel_num = 3
num_inputs = feature_dim * channel_num * window
num_outputs = 9001 # CART labels
EpochSplit = 6

def get_sprint_dataset(data):
assert data in ["train", "cv"]
epochSplit = {"train": EpochSplit, "cv": 1}

# see /u/tuske/work/ASR/switchboard/corpus/readme
# and zoltans mail https://mail.google.com/mail/u/0/#inbox/152891802cbb2b40
files = {}
files["config"] = "config/training.config"
files["corpus"] = "/u/corpora/speech/switchboard-1/xml/swb1-all/swb1-all.corpus.gz"
files["segments"] = "dependencies/seg_%s" % {"train":"train", "cv":"cv_head3000"}[data]
files["features"] = "/u/bozheniuk/setups/switchboard/feature_extraction/cluster_setup/logmel64_30/data/logmel.train.bundle"
files["lexicon"] = "/u/tuske/work/ASR/switchboard/corpus/train.lex.v1_0_3.ci.gz"
files["alignment"] = "dependencies/tuske__2016_01_28__align.combined.train"
files["cart"] = "/u/tuske/work/ASR/switchboard/initalign/data/%s" % {9001: "cart-9000"}[num_outputs]
for k, v in sorted(files.items()):
assert os.path.exists(v), "%s %r does not exist" % (k, v)
estimated_num_seqs = {"train": 227047, "cv": 3000} # wc -l segment-file

# features: /u/tuske/work/ASR/switchboard/feature.extraction/gt40_40/data/gt.train.*
args = [
"--config=" + files["config"],
lambda: "--*.corpus.file=" + cf(files["corpus"]),
lambda: "--*.corpus.segments.file=" + cf(files["segments"]),
{"train": "--*.corpus.segment-order-shuffle=true", "cv": "--*.corpus.segment-order-sort-by-time-length=true"}[data],
"--*.state-tying.type=cart",
lambda: "--*.state-tying.file=" + cf(files["cart"]),
"--*.trainer-output-dimension=%i" % num_outputs,
lambda: "--*.lexicon.file=" + cf(files["lexicon"]),
lambda: "--*.alignment-cache-path=" + cf(files["alignment"]),
lambda: "--*.feature-cache-path=" + cf(files["features"]),
#"--*.mean-normalization.file=dependencies/setup-base/step254-hybrid-mlp-ibm-cmllr/mlp.4/train_mlp/normalize_layer1/mean",
#"--*.variance-normalization.file=dependencies/setup-base/step254-hybrid-mlp-ibm-cmllr/mlp.4/train_mlp/normalize_layer1/std",
"--*.log-channel.file=log/crnn.sprint.train-dataset.xml",
"--*.window-size=1",
"--*.trainer-output-dimension=%i" % num_outputs
]
return {
"class": "ExternSprintDataset", "sprintTrainerExecPath": "sprint-executables/nn-trainer",
"sprintConfigStr": args,
"partitionEpoch": epochSplit[data],
"estimated_num_seqs": estimated_num_seqs[data] // (epochSplit[data] or 1),
}

cache_size = "0"
# network
# (also defined by num_inputs & num_outputs)
dropout = 0.05
ldropout = 0.05
L2 = 0.1

# bn params
masked_time = True
fused = True
axes = ["f"]

bn_momentum = 0.997
bn_epsilon = 1e-5

cur_feat_dim = feature_dim
network = {}
_last = "data"

def add_sequential_layer(name, d, from_=None):
global _last, network
assert "from" not in d
if from_ is not None:
d["from"] = from_
else:
d["from"] = [_last]
assert name not in network
network[name] = d
_last = name

# data augmentation
def summary(name, x):
"""
:param str name:
:param tf.Tensor x: (batch,time,feature)
"""
import tensorflow as tf
# tf.summary.image wants [batch_size, height, width, channels],
# we have (batch, time, feature).
img = tf.expand_dims(x, axis=3) # (batch,time,feature,1)
img = tf.transpose(img, [0, 2, 1, 3]) # (batch,feature,time,1)
tf.summary.image(name, img, max_outputs=10)
tf.summary.scalar("%s_max_abs" % name, tf.reduce_max(tf.abs(x)))
mean = tf.reduce_mean(x)
tf.summary.scalar("%s_mean" % name, mean)
stddev = tf.sqrt(tf.reduce_mean(tf.square(x - mean)))
tf.summary.scalar("%s_stddev" % name, stddev)
tf.summary.histogram("%s_hist" % name, tf.reduce_max(tf.abs(x), axis=2))


def _mask(x, axis, pos, max_amount):
"""
:param tf.Tensor x: (batch,time,feature)
:param int axis:
:param tf.Tensor pos: (batch,)
:param int max_amount: inclusive
"""
import tensorflow as tf
ndim = x.get_shape().ndims
n_batch = tf.shape(x)[0]
dim = tf.shape(x)[axis]
amount = tf.random_uniform(shape=(n_batch,), minval=1, maxval=max_amount + 1, dtype=tf.int32)
pos2 = tf.minimum(pos + amount, dim)
idxs = tf.expand_dims(tf.range(0, dim), 0) # (1,dim)
pos_bc = tf.expand_dims(pos, 1) # (batch,1)
pos2_bc = tf.expand_dims(pos2, 1) # (batch,1)
cond = tf.logical_and(tf.greater_equal(idxs, pos_bc), tf.less(idxs, pos2_bc)) # (batch,dim)
cond = tf.reshape(cond, [tf.shape(x)[i] if i in (0, axis) else 1 for i in range(ndim)])
from TFUtil import where_bc
x = where_bc(cond, 0.0, x)
return x


def random_mask(x, axis, min_num, max_num, max_dims):
"""
:param tf.Tensor x: (batch,time,feature)
:param int axis:
:param int|tf.Tensor min_num:
:param int|tf.Tensor max_num: inclusive
:param int max_dims: inclusive
"""
import tensorflow as tf
n_batch = tf.shape(x)[0]
num = tf.random_uniform(shape=(n_batch,), minval=min_num, maxval=max_num + 1, dtype=tf.int32)
# https://github.com/tensorflow/tensorflow/issues/9260
# https://timvieira.github.io/blog/post/2014/08/01/gumbel-max-trick-and-weighted-reservoir-sampling/
z = -tf.log(-tf.log(tf.random_uniform((n_batch, tf.shape(x)[axis]), 0, 1)))
_, indices = tf.nn.top_k(z, tf.reduce_max(num))
_, x = tf.while_loop(
cond=lambda i, _: tf.less(i, tf.reduce_max(num)),
body=lambda i, x: (
i + 1,
tf.where(
tf.less(i, num),
_mask(x, axis=axis, pos=indices[:, i], max_amount=max_dims),
x)),
loop_vars=(0, x))
return x


def random_warp(x, std, scale):
"""
:param tf.Tensor x: (batch,time,dim)
:param (float,float) std:
:param (float,float) scale:
:rtype: tf.Tensor
:return: x transformed
"""
import tensorflow as tf
from TFUtil import create_random_warp_flow_2d, dense_image_warp
x = tf.expand_dims(x, axis=-1)
flow = create_random_warp_flow_2d(tf.shape(x)[:-1], std=std, scale=scale)
x = dense_image_warp(x, flow=flow)
x = tf.squeeze(x, axis=-1)
return x


def transform(x, network):
import tensorflow as tf
def get_masked():
x_masked = x
x_masked = random_mask(x_masked, axis=1, min_num=1, max_num=3, max_dims=40)
x_masked = random_mask(x_masked, axis=2, min_num=1, max_num=2, max_dims=30)
return x_masked
x = network.cond_on_train(get_masked, lambda: x)
return x


def jump_net(prefix, inputs, filters, dilation_rate, data_format, conv_time_dim=False):
if data_format == 'channels_first':
NCHW = True
else:
NCHW = False
filter_size = (3, 3)
strides = (1, 1)
padding = "SAME"
use_frequency = False
add_sequential_layer("%s_c1" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW,
"strides": strides, "dilation_rate": (dilation_rate, 1), "padding": padding, "activation": None, "with_bias": False, "dropout": dropout,
"forward_weights_init": "xavier", "L2": L2})
add_sequential_layer("%s_bn1" % prefix, {"class": "batch_norm", "masked_time": masked_time, "axes": axes, "fused": fused, "momentum": bn_momentum, "epsilon": bn_epsilon})
add_sequential_layer("%s_y1" % prefix, {"class": "activation", "activation": "relu", "batch_norm": False})
add_sequential_layer("%s_c2" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW,
"strides": strides, "dilation_rate": (dilation_rate, 1), "padding": padding, "activation": None, "with_bias": False, "dropout": dropout,
"forward_weights_init": "xavier", "L2": L2})

add_sequential_layer("%s_p" % prefix, {"class": "combine", "kind": "add"}, from_=["%s_c2" % prefix, inputs])
add_sequential_layer("%s_bn2" % prefix, {"class": "batch_norm", "masked_time": masked_time, "axes": axes, "fused": fused, "momentum": bn_momentum, "epsilon": bn_epsilon})
add_sequential_layer("%s_o" % prefix, {"class": "activation", "activation": "relu", "batch_norm": False})
return "%s_o" % prefix


def jump_block(prefix, inputs, jump_net_num, filters, data_format, dilation_rate, attention=False, conv_time_dim=False):
if data_format == 'channels_first':
NCHW = True
else:
NCHW = False
filter_size = (3, 3)
strides = (1, 1)
padding = "SAME"
use_frequency = False
add_sequential_layer("%s_c" % prefix, {"class": "conv", "n_out": filters, "filter_size": filter_size, "auto_use_channel_first": NCHW,
"strides": strides, "dilation_rate": dilation_rate, "padding": padding, "activation": None, "with_bias": False, "dropout": dropout,
"forward_weights_init": "xavier", "L2": L2})
# doing strides
add_sequential_layer("%s_c_strides" % prefix, {"class": "slice", "axis": "s:1", "slice_step": 2})

dilation_rate *= 2
# print("dilation_rate: ", dilation_rate)
inputs = "%s_c_strides" % prefix
for i in range(1, jump_net_num + 1):
inputs = jump_net("%s_net%i" % (prefix, i + 1), inputs, filters, dilation_rate, data_format=data_format, conv_time_dim=conv_time_dim)
return inputs


def lacea(inputs):
block_sizes = [2, 2, 2, 2]
filters = 128
data_format = 'channels_last'
conv_time_dim = True
dilation_rate = 1
attention = False

for i, num_blocks in enumerate(block_sizes):
inputs = jump_block("block%i" % (i + 1), inputs, num_blocks, filters, data_format, dilation_rate, attention=attention, conv_time_dim=conv_time_dim)
filters *= 2
dilation_rate *= 2
return

add_sequential_layer("split", {"class": "split_dims", "axis": "f", "dims": (channel_num, feature_dim)}) # output: (batch, time, window = 61, feature = 40, channel = 1)
add_sequential_layer("swap_axes", {"class": "swap_axes", "axis1": "s:1", "axis2": "f"})
add_sequential_layer("data_aug", {"class": "eval", "eval": "self.network.get_config().typed_value('transform')(source(0), network=self.network)"})
lacea(inputs="data_aug")
add_sequential_layer("out_pool", {"class": "reduce", "mode": "avg", "axes": "s:1", "keep_dims": False})
add_sequential_layer("output", {"class": "softmax", "loss": "ce", "L2": L2, "n_out": num_outputs, "loss_opts": {"focal_loss_factor": 2.0}, "dropout": ldropout})

train = get_sprint_dataset("train")
dev = get_sprint_dataset("cv")

############## debug stuff
debug_print_layer_output_template = True # useful for debugging
#debug_print_layer_output_sizes = True
#debug_print_layer_output_shape = True # might be useful for debugging
#debug_shell_in_runner = True
log_batch_size = True
tf_log_memory_usage = True
############## debug stuff

# trainer
batching = "random"
batch_size = 24 * (150)
max_seqs = 500
chunking = "150:150"
truncation = -1
num_epochs = 120
gradient_clip = 0
gradient_noise = 0.1
momentum = 0.99
learning_rate = 5e-6
learning_rate_file = "newbob.data"
learning_rate_control = "newbob_multi_epoch"
learning_rate_control_relative_error_relative_lr = True
newbob_multi_num_epochs = 6
newbob_multi_update_interval = 1
model = "net-model/network"

cleanup_old_models = True
store_metadata_mod_step = None

# log
log = "log/crnn.train.log"
log_verbosity = 5
Loading

0 comments on commit f5f36fa

Please sign in to comment.