After submitting all jobs with `source slurm/whisper_decode_video_slurm_wrapper.sh`, use this notebook to print the results of all decoding runs. It will load the decoding WER / BLEU scores and print them in a convinient table.

In [9]:
import os
# NOTE: select checkpoint to run
checkpoint='whisper-flamingo_en-x_small.pt'

noises =[1000, 0] # clean, 0
modalities = ['asr', 'avsr']

# select the beam size
# beam = 1
beam = 15

# root = '../decode/models/checkpoint/'
root = '../decode/models/'
langs = ['en', 'el', 'es', 'fr', 'it', 'pt', 'ru'] 

noise_fn = 'muavic'
# noise_fn = 'lrs3'

results = {lang: {modality: {noise: 0 for noise in noises} for modality in modalities} for lang in langs}
for lang in langs:
    for noise in noises:
        for modality in modalities:
            try:
                file = 'wer.368862' if lang == 'en' else 'bleu.368862'
                print(os.path.join(root, checkpoint, lang, 'test', modality, 'snr-{}'.format(noise), 'visible-0', 'beam-{}'.format(beam), noise_fn, file))
                with open(os.path.join(root, checkpoint, lang, 'test', modality, 'snr-{}'.format(noise), 'visible-0', 'beam-{}'.format(beam), noise_fn, file)) as f:                    
                    first_line = f.readline().strip('\n')
                    prefix = 5 if lang == 'en' else 6
                    results[lang][modality][noise] = round(float(first_line[prefix:]), 1)
            except:
                continue

../decode/models/whisper-flamingo_en-x_small.pt/en/test/asr/snr-1000/visible-0/beam-15/muavic/wer.368862
../decode/models/whisper-flamingo_en-x_small.pt/en/test/avsr/snr-1000/visible-0/beam-15/muavic/wer.368862
../decode/models/whisper-flamingo_en-x_small.pt/en/test/asr/snr-0/visible-0/beam-15/muavic/wer.368862
../decode/models/whisper-flamingo_en-x_small.pt/en/test/avsr/snr-0/visible-0/beam-15/muavic/wer.368862
../decode/models/whisper-flamingo_en-x_small.pt/el/test/asr/snr-1000/visible-0/beam-15/muavic/bleu.368862
../decode/models/whisper-flamingo_en-x_small.pt/el/test/avsr/snr-1000/visible-0/beam-15/muavic/bleu.368862
../decode/models/whisper-flamingo_en-x_small.pt/el/test/asr/snr-0/visible-0/beam-15/muavic/bleu.368862
../decode/models/whisper-flamingo_en-x_small.pt/el/test/avsr/snr-0/visible-0/beam-15/muavic/bleu.368862
../decode/models/whisper-flamingo_en-x_small.pt/es/test/asr/snr-1000/visible-0/beam-15/muavic/bleu.368862
../decode/models/whisper-flamingo_en-x_small.pt/es/test/av

In [10]:
# Extract languages
languages = list(results.keys())

# Extract results
audio_clean = [results[lang][modalities[0]][noises[0]] for lang in languages]
audio_visual_clean = [results[lang][modalities[1]][noises[0]] for lang in languages]
audio_babble_lrs3 = [results[lang][modalities[0]][noises[1]] for lang in languages]
audio_visual_babble_lrs3 = [results[lang][modalities[1]][noises[1]] for lang in languages]

# Print table headers
print(checkpoint)
# Print language row
print('Languages ', end='')
for lang in languages:
    print(lang + ' ', end='')
print()

# Print Audio-Clean row
print('Audio-Clean ', end='')
for val in audio_clean:
    print(str(val) + ' ', end='')
print()

# Print Audio-Visual-Clean row
print('Audio-Visual-Clean ', end='')
for val in audio_visual_clean:
    print(str(val) + ' ', end='')
print()

# Print Audio-Babble row
print('Audio-Babble-LRS3 ', end='')
for val in audio_babble_lrs3:
    print(str(val) + ' ', end='')
print()

# Print Audio-Visual-Babble row
print('Audio-Visual-Babble-LRS3 ', end='')
for val in audio_visual_babble_lrs3:
    print(str(val) + ' ', end='')
print()


whisper-flamingo_en-x_small.pt
Languages en el es fr it pt ru 
Audio-Clean 0 0 0 0 0 0 0 
Audio-Visual-Clean 2.0 22.7 27.0 24.7 20.7 21.3 15.5 
Audio-Babble-LRS3 0 0 0 0 0 0 0 
Audio-Visual-Babble-LRS3 10.6 18.9 22.1 21.1 17.1 18.3 13.1 
