Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

63 test results #78

Merged
merged 9 commits into from
Mar 3, 2021
10 changes: 7 additions & 3 deletions platalea/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def val_loss():
optimizer = create_optimizer(config, net_parameters)
scheduler = create_scheduler(config, optimizer, data)

results = []
with open("result.json", "w") as out:
best_score = -np.inf
for epoch in range(1, config['epochs']+1):
Expand All @@ -109,12 +110,13 @@ def val_loss():
loss.backward()
nn.utils.clip_grad_norm_(net.parameters(), config['max_norm'])
optimizer.step()
cost += Counter({'cost': loss.item(), 'N': 1})
average_loss = cost['cost'] / cost['N']
if 'opt' not in config.keys() or config['opt'] == 'adam':
scheduler.step()
cost += Counter({'cost': loss.item(), 'N': 1})
if j % 100 == 0:
logging.info("train {} {} {}".format(
epoch, j, cost['cost'] / cost['N']))
epoch, j, average_loss))
if j % 400 == 0:
logging.info("valid {} {} {}".format(epoch, j, val_loss()))
with torch.no_grad():
Expand All @@ -124,7 +126,9 @@ def val_loss():
else:
result = platalea.score.score_asr(net, data['val'].dataset)
net.train()
result['average_loss'] = average_loss
result['epoch'] = epoch
results.append(result)
json.dump(result, out)
print('', file=out, flush=True)
if 'epsilon_decay' in config.keys():
Expand All @@ -148,7 +152,7 @@ def val_loss():
if 'epsilon_decay' in config.keys():
# Save full model for inference
torch.save(net, 'net.best.pt')

return results

def get_default_config(hidden_size_factor=1024):
fd = D.Flickr8KData
Expand Down
20 changes: 14 additions & 6 deletions platalea/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,12 @@ def val_loss(net):

debug_logging_active = logging.getLogger().isEnabledFor(logging.DEBUG)

loss_value = None
results = []
with open("result.json", "w") as out:
for epoch in range(1, config['epochs']+1):
cost = Counter()
for j, item in enumerate(data['train'], start=1): # check reshuffling
for j, item in enumerate(data['train'], start=1): # check reshuffling
wandb_step_output = {
"epoch": epoch,
}
Expand All @@ -115,15 +117,16 @@ def val_loss(net):
scheduler.step()
loss_value = loss.item()
cost += Counter({'cost': loss_value, 'N': 1})
average_loss = cost['cost'] / cost['N']

# logging
wandb_step_output["step loss"] = loss_value
wandb_step_output["last_lr"] = scheduler.get_last_lr()[0]
if j % 100 == 0:
logging.info("train %d %d %f", epoch, j, cost['cost'] / cost['N'])
logging.info("train %d %d %f", epoch, j, average_loss)
else:
if debug_logging_active:
logging.debug("train %d %d %f %f", epoch, j, cost['cost'] / cost['N'], loss_value)
logging.debug("train %d %d %f %f", epoch, j, average_loss, loss_value)
if not config.get('validate_on_cpu'):
if j % 400 == 0:
validation_loss = val_loss(net)
Expand Down Expand Up @@ -161,15 +164,20 @@ def val_loss(net):
if config.get('score_on_cpu') and platalea.hardware._device == 'cpu' and previous_device != 'cpu':
platalea.hardware.set_device(previous_device)

result['epoch'] = epoch
json.dump(result, out)
print('', file=out, flush=True)

if config.get('validate_on_cpu'):
# only add it here (for wandb), because json.dump doesn't like tensor values
result["validation loss"] = validation_loss

result['average_loss'] = average_loss
result['epoch'] = epoch
results.append(result)
json.dump(result, out)
print('', file=out, flush=True)
wandb.log(result)

return results


DEFAULT_CONFIG = dict(SpeechEncoder=dict(conv=dict(in_channels=39, out_channels=64, kernel_size=6, stride=2, padding=0,
bias=False),
Expand Down
8 changes: 6 additions & 2 deletions platalea/basicvq.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def val_loss():
config['min_lr'] = 1e-6
scheduler = create_scheduler(config, optimizer, data)

results = []
with open("result.json", "w") as out:
for epoch in range(1, config['epochs']+1):
cost = Counter()
Expand All @@ -94,16 +95,19 @@ def val_loss():
optimizer.step()
scheduler.step()
cost += Counter({'cost': loss.item(), 'N':1})
average_loss = cost['cost'] / cost['N']
if j % 100 == 0:
logging.info("train {} {} {}".format(epoch, j, cost['cost']/cost['N']))
logging.info("train {} {} {}".format(epoch, j, average_loss))
if j % 400 == 0:
logging.info("valid {} {} {}".format(epoch, j, val_loss()))
result = platalea.score.score(net, data['val'].dataset)
result['average_loss'] = average_loss
result['epoch'] = epoch
results.append(result)
print(json.dumps(result), file=out, flush=True)
logging.info("Saving model in net.{}.pt".format(epoch))
torch.save(net, "net.{}.pt".format(epoch))

return results

DEFAULT_CONFIG = dict(SpeechEncoder=dict(SpeechEncoderBottom=dict(conv=dict(in_channels=39, out_channels=64, kernel_size=6, stride=2, padding=0, bias=False),
rnn= dict(input_size=64, hidden_size=1024, num_layers=2,
Expand Down
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@
l2_regularization=args.l2_regularization,)

logging.info('Training')
M.experiment(net, data, run_config, slt=data['train'].dataset.is_slt())
result = M.experiment(net, data, run_config, slt=data['train'].dataset.is_slt())
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@
epochs=args.epochs, l2_regularization=args.l2_regularization)

logging.info('Training')
M.experiment(net, data, run_config)
result = M.experiment(net, data, run_config)
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/mtl_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@
dict(name='ASR', net=net.SpeechTranscriber, data=data, eval=scorer)]

logging.info('Training')
M.experiment(net, tasks, run_config)
result = M.experiment(net, tasks, run_config)
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/mtl_st.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,4 @@
dict(name='ST', net=net.SpeechText, data=data, eval=score_speech_text)]

logging.info('Training')
M.experiment(net, tasks, run_config)
result = M.experiment(net, tasks, run_config)
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/pip_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,6 @@
l2_regularization=args.l2_regularization)

logging.info('Training text-image')
M2.experiment(net, data, run_config)
result = M2.experiment(net, data, run_config)
copyfile('result.json', 'result_text_image.json')
copy_best('.', 'result_text_image.json', 'ti.best.pt')
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/text_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@
l2_regularization=args.l2_regularization,)

logging.info('Training')
M.experiment(net, data, run_config)
result = M.experiment(net, data, run_config)
2 changes: 1 addition & 1 deletion platalea/experiments/flickr8k/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,4 @@ def __new__(cls, value):
logged_config['encoder_config'].pop('SpeechEncoder') # Object info is redundant in log.

logging.info('Training')
M.experiment(net, data, run_config, wandb_project='platalea_transformer', wandb_log=logged_config)
result = M.experiment(net, data, run_config, wandb_project='platalea_transformer', wandb_log=logged_config)
12 changes: 9 additions & 3 deletions platalea/mtl.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def experiment(net, tasks, config):
t['net'].train()
t['optimizer'] = create_optimizer(config, t['net'].parameters())
t['scheduler'] = create_scheduler(config, t['optimizer'], t['data'])


results = []
with open("result.json", "w") as out:
for epoch in range(1, config['epochs']+1):
for t in tasks:
Expand All @@ -122,10 +123,10 @@ def experiment(net, tasks, config):
t['optimizer'].step()
t['scheduler'].step()
t['cost'] += Counter({'cost': loss.item(), 'N': 1})
t['average_loss'] = t['cost']['cost'] / t['cost']['N']
if j % 100 == 0:
logging.info("train {} {} {} {}".format(
t['name'], epoch, j,
t['cost']['cost'] / t['cost']['N']))
t['name'], epoch, j, t['average_loss']))
if j % 400 == 0:
logging.info("valid {} {} {} {}".format(
t['name'], epoch, j,
Expand All @@ -138,9 +139,14 @@ def experiment(net, tasks, config):
result[t['name']] = t['eval'](t['net'],
t['data']['val'].dataset)
net.train()
for t in tasks:
result[t['name']].update({'average_loss': t['average_loss']})
result['epoch'] = epoch
results.append(result)
json.dump(result, out)
print('', file=out, flush=True)
# Saving model
logging.info("Saving model in net.{}.pt".format(epoch))
torch.save(net, "net.{}.pt".format(epoch))

return results
8 changes: 7 additions & 1 deletion platalea/text_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ def val_loss():
optimizer = create_optimizer(config, net_parameters)
scheduler = create_scheduler(config, optimizer, data)

results = []
with open("result.json", "w") as out:
for epoch in range(1, config['epochs']+1):
cost = Counter()
average_loss = None
for j, item in enumerate(data['train'], start=1):
item = {key: value.to(_device) for key, value in item.items()}
loss = net.cost(item)
Expand All @@ -90,17 +92,21 @@ def val_loss():
optimizer.step()
scheduler.step()
cost += Counter({'cost': loss.item(), 'N': 1})
average_loss = cost['cost'] / cost['N']
if j % 100 == 0:
logging.info("train {} {} {}".format(
epoch, j, cost['cost']/cost['N']))
epoch, j, average_loss))
if j % 400 == 0:
logging.info("valid {} {} {}".format(epoch, j, val_loss()))
result = platalea.score.score_text_image(net, data['val'].dataset)
result['average_loss'] = average_loss
result['epoch'] = epoch
results.append(result)
json.dump(result, out)
print('', file=out, flush=True)
logging.info("Saving model in net.{}.pt".format(epoch))
torch.save(net, "net.{}.pt".format(epoch))
return results


def get_default_config(hidden_size_factor=1024):
Expand Down
Loading