diff --git a/platalea/asr.py b/platalea/asr.py index 1f2a99f..b25af66 100644 --- a/platalea/asr.py +++ b/platalea/asr.py @@ -98,6 +98,7 @@ def val_loss(): optimizer = create_optimizer(config, net_parameters) scheduler = create_scheduler(config, optimizer, data) + results = [] with open("result.json", "w") as out: best_score = -np.inf for epoch in range(1, config['epochs']+1): @@ -109,12 +110,13 @@ def val_loss(): loss.backward() nn.utils.clip_grad_norm_(net.parameters(), config['max_norm']) optimizer.step() + cost += Counter({'cost': loss.item(), 'N': 1}) + average_loss = cost['cost'] / cost['N'] if 'opt' not in config.keys() or config['opt'] == 'adam': scheduler.step() - cost += Counter({'cost': loss.item(), 'N': 1}) if j % 100 == 0: logging.info("train {} {} {}".format( - epoch, j, cost['cost'] / cost['N'])) + epoch, j, average_loss)) if j % 400 == 0: logging.info("valid {} {} {}".format(epoch, j, val_loss())) with torch.no_grad(): @@ -124,7 +126,9 @@ def val_loss(): else: result = platalea.score.score_asr(net, data['val'].dataset) net.train() + result['average_loss'] = average_loss result['epoch'] = epoch + results.append(result) json.dump(result, out) print('', file=out, flush=True) if 'epsilon_decay' in config.keys(): @@ -148,7 +152,7 @@ def val_loss(): if 'epsilon_decay' in config.keys(): # Save full model for inference torch.save(net, 'net.best.pt') - + return results def get_default_config(hidden_size_factor=1024): fd = D.Flickr8KData diff --git a/platalea/basic.py b/platalea/basic.py index 557099a..7acc2ec 100644 --- a/platalea/basic.py +++ b/platalea/basic.py @@ -99,10 +99,12 @@ def val_loss(net): debug_logging_active = logging.getLogger().isEnabledFor(logging.DEBUG) + loss_value = None + results = [] with open("result.json", "w") as out: for epoch in range(1, config['epochs']+1): cost = Counter() - for j, item in enumerate(data['train'], start=1): # check reshuffling + for j, item in enumerate(data['train'], start=1): # check reshuffling wandb_step_output = { "epoch": epoch, } @@ -115,15 +117,16 @@ def val_loss(net): scheduler.step() loss_value = loss.item() cost += Counter({'cost': loss_value, 'N': 1}) + average_loss = cost['cost'] / cost['N'] # logging wandb_step_output["step loss"] = loss_value wandb_step_output["last_lr"] = scheduler.get_last_lr()[0] if j % 100 == 0: - logging.info("train %d %d %f", epoch, j, cost['cost'] / cost['N']) + logging.info("train %d %d %f", epoch, j, average_loss) else: if debug_logging_active: - logging.debug("train %d %d %f %f", epoch, j, cost['cost'] / cost['N'], loss_value) + logging.debug("train %d %d %f %f", epoch, j, average_loss, loss_value) if not config.get('validate_on_cpu'): if j % 400 == 0: validation_loss = val_loss(net) @@ -161,15 +164,20 @@ def val_loss(net): if config.get('score_on_cpu') and platalea.hardware._device == 'cpu' and previous_device != 'cpu': platalea.hardware.set_device(previous_device) - result['epoch'] = epoch - json.dump(result, out) - print('', file=out, flush=True) if config.get('validate_on_cpu'): # only add it here (for wandb), because json.dump doesn't like tensor values result["validation loss"] = validation_loss + + result['average_loss'] = average_loss + result['epoch'] = epoch + results.append(result) + json.dump(result, out) + print('', file=out, flush=True) wandb.log(result) + return results + DEFAULT_CONFIG = dict(SpeechEncoder=dict(conv=dict(in_channels=39, out_channels=64, kernel_size=6, stride=2, padding=0, bias=False), diff --git a/platalea/basicvq.py b/platalea/basicvq.py index 0ff0b5c..259607e 100644 --- a/platalea/basicvq.py +++ b/platalea/basicvq.py @@ -83,6 +83,7 @@ def val_loss(): config['min_lr'] = 1e-6 scheduler = create_scheduler(config, optimizer, data) + results = [] with open("result.json", "w") as out: for epoch in range(1, config['epochs']+1): cost = Counter() @@ -94,16 +95,19 @@ def val_loss(): optimizer.step() scheduler.step() cost += Counter({'cost': loss.item(), 'N':1}) + average_loss = cost['cost'] / cost['N'] if j % 100 == 0: - logging.info("train {} {} {}".format(epoch, j, cost['cost']/cost['N'])) + logging.info("train {} {} {}".format(epoch, j, average_loss)) if j % 400 == 0: logging.info("valid {} {} {}".format(epoch, j, val_loss())) result = platalea.score.score(net, data['val'].dataset) + result['average_loss'] = average_loss result['epoch'] = epoch + results.append(result) print(json.dumps(result), file=out, flush=True) logging.info("Saving model in net.{}.pt".format(epoch)) torch.save(net, "net.{}.pt".format(epoch)) - + return results DEFAULT_CONFIG = dict(SpeechEncoder=dict(SpeechEncoderBottom=dict(conv=dict(in_channels=39, out_channels=64, kernel_size=6, stride=2, padding=0, bias=False), rnn= dict(input_size=64, hidden_size=1024, num_layers=2, diff --git a/platalea/experiments/flickr8k/asr.py b/platalea/experiments/flickr8k/asr.py index e4d7c99..4a8266c 100644 --- a/platalea/experiments/flickr8k/asr.py +++ b/platalea/experiments/flickr8k/asr.py @@ -39,4 +39,4 @@ l2_regularization=args.l2_regularization,) logging.info('Training') -M.experiment(net, data, run_config, slt=data['train'].dataset.is_slt()) +result = M.experiment(net, data, run_config, slt=data['train'].dataset.is_slt()) diff --git a/platalea/experiments/flickr8k/basic.py b/platalea/experiments/flickr8k/basic.py index 7297bbe..ec396a1 100644 --- a/platalea/experiments/flickr8k/basic.py +++ b/platalea/experiments/flickr8k/basic.py @@ -49,4 +49,4 @@ epochs=args.epochs, l2_regularization=args.l2_regularization) logging.info('Training') -M.experiment(net, data, run_config) +result = M.experiment(net, data, run_config) diff --git a/platalea/experiments/flickr8k/mtl_asr.py b/platalea/experiments/flickr8k/mtl_asr.py index ed926c9..f938ec4 100644 --- a/platalea/experiments/flickr8k/mtl_asr.py +++ b/platalea/experiments/flickr8k/mtl_asr.py @@ -89,4 +89,4 @@ dict(name='ASR', net=net.SpeechTranscriber, data=data, eval=scorer)] logging.info('Training') -M.experiment(net, tasks, run_config) +result = M.experiment(net, tasks, run_config) diff --git a/platalea/experiments/flickr8k/mtl_st.py b/platalea/experiments/flickr8k/mtl_st.py index 11b6d9a..5a30d2a 100644 --- a/platalea/experiments/flickr8k/mtl_st.py +++ b/platalea/experiments/flickr8k/mtl_st.py @@ -73,4 +73,4 @@ dict(name='ST', net=net.SpeechText, data=data, eval=score_speech_text)] logging.info('Training') -M.experiment(net, tasks, run_config) +result = M.experiment(net, tasks, run_config) diff --git a/platalea/experiments/flickr8k/pip_seq.py b/platalea/experiments/flickr8k/pip_seq.py index 90ef567..0553c0f 100644 --- a/platalea/experiments/flickr8k/pip_seq.py +++ b/platalea/experiments/flickr8k/pip_seq.py @@ -87,6 +87,6 @@ l2_regularization=args.l2_regularization) logging.info('Training text-image') -M2.experiment(net, data, run_config) +result = M2.experiment(net, data, run_config) copyfile('result.json', 'result_text_image.json') copy_best('.', 'result_text_image.json', 'ti.best.pt') diff --git a/platalea/experiments/flickr8k/text_image.py b/platalea/experiments/flickr8k/text_image.py index 53a5093..aa9e9a1 100644 --- a/platalea/experiments/flickr8k/text_image.py +++ b/platalea/experiments/flickr8k/text_image.py @@ -36,4 +36,4 @@ l2_regularization=args.l2_regularization,) logging.info('Training') -M.experiment(net, data, run_config) +result = M.experiment(net, data, run_config) diff --git a/platalea/experiments/flickr8k/transformer.py b/platalea/experiments/flickr8k/transformer.py index d6abda6..53f04d0 100644 --- a/platalea/experiments/flickr8k/transformer.py +++ b/platalea/experiments/flickr8k/transformer.py @@ -97,4 +97,4 @@ def __new__(cls, value): logged_config['encoder_config'].pop('SpeechEncoder') # Object info is redundant in log. logging.info('Training') -M.experiment(net, data, run_config, wandb_project='platalea_transformer', wandb_log=logged_config) +result = M.experiment(net, data, run_config, wandb_project='platalea_transformer', wandb_log=logged_config) diff --git a/platalea/mtl.py b/platalea/mtl.py index 0279d71..3b2a7f4 100644 --- a/platalea/mtl.py +++ b/platalea/mtl.py @@ -106,7 +106,8 @@ def experiment(net, tasks, config): t['net'].train() t['optimizer'] = create_optimizer(config, t['net'].parameters()) t['scheduler'] = create_scheduler(config, t['optimizer'], t['data']) - + + results = [] with open("result.json", "w") as out: for epoch in range(1, config['epochs']+1): for t in tasks: @@ -122,10 +123,10 @@ def experiment(net, tasks, config): t['optimizer'].step() t['scheduler'].step() t['cost'] += Counter({'cost': loss.item(), 'N': 1}) + t['average_loss'] = t['cost']['cost'] / t['cost']['N'] if j % 100 == 0: logging.info("train {} {} {} {}".format( - t['name'], epoch, j, - t['cost']['cost'] / t['cost']['N'])) + t['name'], epoch, j, t['average_loss'])) if j % 400 == 0: logging.info("valid {} {} {} {}".format( t['name'], epoch, j, @@ -138,9 +139,14 @@ def experiment(net, tasks, config): result[t['name']] = t['eval'](t['net'], t['data']['val'].dataset) net.train() + for t in tasks: + result[t['name']].update({'average_loss': t['average_loss']}) result['epoch'] = epoch + results.append(result) json.dump(result, out) print('', file=out, flush=True) # Saving model logging.info("Saving model in net.{}.pt".format(epoch)) torch.save(net, "net.{}.pt".format(epoch)) + + return results diff --git a/platalea/text_image.py b/platalea/text_image.py index b623e38..443bf67 100644 --- a/platalea/text_image.py +++ b/platalea/text_image.py @@ -79,9 +79,11 @@ def val_loss(): optimizer = create_optimizer(config, net_parameters) scheduler = create_scheduler(config, optimizer, data) + results = [] with open("result.json", "w") as out: for epoch in range(1, config['epochs']+1): cost = Counter() + average_loss = None for j, item in enumerate(data['train'], start=1): item = {key: value.to(_device) for key, value in item.items()} loss = net.cost(item) @@ -90,17 +92,21 @@ def val_loss(): optimizer.step() scheduler.step() cost += Counter({'cost': loss.item(), 'N': 1}) + average_loss = cost['cost'] / cost['N'] if j % 100 == 0: logging.info("train {} {} {}".format( - epoch, j, cost['cost']/cost['N'])) + epoch, j, average_loss)) if j % 400 == 0: logging.info("valid {} {} {}".format(epoch, j, val_loss())) result = platalea.score.score_text_image(net, data['val'].dataset) + result['average_loss'] = average_loss result['epoch'] = epoch + results.append(result) json.dump(result, out) print('', file=out, flush=True) logging.info("Saving model in net.{}.pt".format(epoch)) torch.save(net, "net.{}.pt".format(epoch)) + return results def get_default_config(hidden_size_factor=1024): diff --git a/tests/test_experiments.py b/tests/test_experiments.py index a5ec3e4..32c173f 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -1,14 +1,15 @@ import unittest.mock from flickr1d import __path__ as flickr1d_path +import pandas flickr1d_path = flickr1d_path[-1] def test_config(): with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=2', '--flickr8k_meta=thisandthat.json', - '--audio_features_fn=mfcc_features.pt', - f'--flickr8k_root={flickr1d_path}', - '--lr_scheduler=noam', '-v']): + '--audio_features_fn=mfcc_features.pt', + f'--flickr8k_root={flickr1d_path}', + '--lr_scheduler=noam', '-v']): from platalea.experiments.config import get_argument_parser args = get_argument_parser() @@ -21,6 +22,12 @@ def test_config(): def test_transformer_experiment(): + expected = [{'epoch': 1, + 'medr': 1.5, + 'recall': {1: 0.5, 5: 1.0, 10: 1.0}, + 'average_loss': 0.5153712034225464, + }] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', @@ -29,74 +36,173 @@ def test_transformer_experiment(): '--trafo_d_model=4', '--trafo_feedforward_dim=4']): import platalea.experiments.flickr8k.transformer + result = platalea.experiments.flickr8k.transformer.result + + _assert_nested_almost_equal(result, expected) def test_basic_experiment(): + expected = [{'epoch': 1, + 'medr': 1.5, + 'recall': {1: 0.5, 5: 1.0, 10: 1.0}, + 'average_loss': 0.41894787549972534 + }] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', '--hidden_size_factor=4']): import platalea.experiments.flickr8k.basic + result = platalea.experiments.flickr8k.basic.result + + _assert_nested_almost_equal(result, expected) def test_mtl_asr_experiment(): + expected = [ + {'ASR': {'cer': {'CER': 6.791171477079796, + 'Cor': 0, + 'Del': 0, + 'Ins': 3411, + 'Sub': 589}, + 'average_loss': 4.440168142318726, + 'wer': {'Cor': 0, + 'Del': 118, + 'Ins': 0, + 'Sub': 10, + 'WER': 1.0}}, + 'SI': {'medr': 1.5, + 'recall': {1: 0.5, + 5: 1.0, + 10: 1.0}, + 'average_loss': 0.3971380218863487}, + 'epoch': 1} + ] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', '--hidden_size_factor=4']): import platalea.experiments.flickr8k.mtl_asr + result = platalea.experiments.flickr8k.mtl_asr.result + + _assert_nested_almost_equal(result, expected) def test_mtl_st_experiment(): + expected = [ + {'SI': {'medr': 2.0, 'recall': {1: 0.4, 5: 1.0, 10: 1.0}, 'average_loss': 0.3906550034880638}, + 'ST': {'medr': 6.0, 'recall': {1: 0.0, 5: 0.5, 10: 1.0}, 'average_loss': 0.37090546637773514}, + 'epoch': 1}, + ] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', '--hidden_size_factor=4']): import platalea.experiments.flickr8k.mtl_st + result = platalea.experiments.flickr8k.mtl_st.result + + _assert_nested_almost_equal(result, expected) def test_asr_experiment(): + expected = [ + {'cer': {'CER': 6.784380305602716, + 'Cor': 4, + 'Del': 0, + 'Ins': 3411, + 'Sub': 585}, + 'epoch': 1, + 'average_loss': 4.3757164478302, + 'wer': {'Cor': 0, + 'Del': 118, + 'Ins': 0, + 'Sub': 10, + 'WER': 1.0}}, + ] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', - '--hidden_size_factor=4']): + '--hidden_size_factor=4', + '--epsilon_decay=0.001']): import platalea.experiments.flickr8k.asr - # save output of this experiment to serve as input for pip_ind and pip_seq + result = platalea.experiments.flickr8k.asr.result + + _assert_nested_almost_equal(result, expected) def test_text_image_experiment(): + expected = [{ + 'epoch': 1, + 'medr': 1.5, + 'recall': {1: 0.5, 5: 1.0, 10: 1.0}, + 'average_loss': 0.3847378194332123, + }] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', '--hidden_size_factor=4']): import platalea.experiments.flickr8k.text_image - # save output of this experiment to serve as input for pip_ind + result = platalea.experiments.flickr8k.text_image.result + + _assert_nested_almost_equal(result, expected) def test_pip_ind_experiment(): + expected = { + 'ranks': [2, 2, 2, 2, 2, 1, 1, 1, 1, 1], + 'recall': { + 1: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], + 5: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + 10: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]} + } + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', '--hidden_size_factor=4', - # '--asr_model_dir={asr_out_path}' - # '--text_image_model_dir={text_image_out_path}' + # '--asr_model_dir={asr_out_path}' + # '--text_image_model_dir={text_image_out_path}' ]): import platalea.experiments.flickr8k.pip_ind + result = platalea.experiments.flickr8k.pip_ind.result + + _assert_nested_almost_equal(result, expected) + def test_pip_seq_experiment(): + expected = [{'medr': 1.5, 'recall': {1: 0.5, 5: 1.0, 10: 1.0}, + 'average_loss': 0.3918714001774788, + 'epoch': 1}] + with unittest.mock.patch('sys.argv', ['[this gets ignored]', '--epochs=1', '-c', f'{flickr1d_path}/config.yml', f'--flickr8k_root={flickr1d_path}', '--hidden_size_factor=4', '--pip_seq_no_beam_decoding', - # '--asr_model_dir={asr_out_path}' + # '--asr_model_dir={asr_out_path}' ]): import platalea.experiments.flickr8k.pip_seq + result = platalea.experiments.flickr8k.pip_seq.result + + _assert_nested_almost_equal(result, expected) + + +def _assert_nested_almost_equal(a, b): + """ + Asserts that 2 nested objects are approximately equal. + The check is done using pandas functions. + By default, pandas uses an absolute tolerance of 1e-8 and a relative tolerance of 1e-5 for any numeric comparison. + """ + pandas.testing.assert_series_equal(pandas.Series(a), pandas.Series(b)) diff --git a/tox.ini b/tox.ini index 7b7e24a..68ebe4a 100644 --- a/tox.ini +++ b/tox.ini @@ -14,6 +14,7 @@ deps = python-Levenshtein git+https://github.com/spokenlanguage/flickr1d.git#egg=flickr1d wandb + pandas commands = wandb off coverage run -m pytest