In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu, corpus_bleu
from nsd_access import NSDAccess
import os, sys
from tqdm import tqdm

2022-04-21 13:54:13.022645: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-5.4.0/openmpi-1.10.7-jdc7f4fjdq5roxhadufd6h66xkwuytss/lib:/usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-4.8.5/gcc-5.4.0-fis24ggupugiobii56fesif2y3qulpdr/lib64:/usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-4.8.5/gcc-5.4.0-fis24ggupugiobii56fesif2y3qulpdr/lib:/usr/local/Cluster-Apps/cuda/8.0/lib64:/usr/local/Cluster-Apps/cuda/8.0/lib:/usr/local/software/global/lib:/usr/local/Cluster-Apps/vgl/2.5.1/64/lib:/usr/local/software/slurm/current/lib
2022-04-21 13:54:13.022713: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
model = 'all_subjects'
epoch = 71
# model = 'subject_2_baseline2'
# epoch = 80
# model = 'subject_2_both_layer_norm'
# epoch = 25

model_path = f'/home/hpcgies1/Masters-Thesis/AttemptFour/Log/{model}/eval_out/output_captions_{epoch}.npy'
home_dir = f'/home/hpcgies1/Masters-Thesis/AttemptFour/'

## Auxiliary functions

In [3]:
nsd_loader = NSDAccess('/home/hpcgies1/rds/hpc-work/NIC/NSD/')
nsd_loader.stim_descriptions = pd.read_csv(nsd_loader.stimuli_description_file, index_col=0)

In [4]:
def load_data(fname):
    return np.squeeze(np.load(open(fname, 'rb')), axis=-1)

def load_tokenizer(fname):
    with open(fname, 'r') as f:
        tok =tokenizer_from_json(f.read())
    return tok

def remove_pad_end(cap: str):
    cap = cap.split(" ")
    cap = [i for i in cap if i != '<pad>' and i != '<end>']
    return " ".join(cap)

def get_target_caption(key):
    """ Return target caption for a given key in [1,73000] """
    with HiddenPrints():
        target = nsd_loader.read_image_coco_info([int(key)-1]) # returns list(dict)
        target = target[0]['caption'] # get first target caption
    return target

def get_target_captions(keys: list):
    """ Return target caption for a given key in [1,73000] """
    keys = [int(i)-1 for i in keys]
    output_targets = []
    with HiddenPrints():
        targets = nsd_loader.read_image_coco_info(keys) # returns list(list(dict))
    for _, t in enumerate(targets):
        ts = []
        for i in range(5):
            target = t[i]['caption'] # get target captions
            ts.append(target)
        output_targets.append(ts)
    return output_targets

def clean_targets(targets: list):
    """ given list of list of targets: return cleaned strings """
    new = []
    for i in range(len(targets)):
        ts = []
        for k in range(5):
            t = targets[i][k]
            t = t.replace("."," ").replace(",", " ").strip().split(" ")
            t = [n.lower() for n in t if n != '']
            t = " ".join(t)
            ts.append(t)
        new.append(ts)
    return new

class HiddenPrints:
    """ Use with with HiddenPrints() to temporarily surpress print output """
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

## Load data

In [5]:
tokenizer = load_tokenizer(f'/home/hpcgies1/Masters-Thesis/AttemptFour/Log/{model}/eval_out/tokenizer.json')
test_keys = pd.read_csv(f'{home_dir}/TrainData/subj02_conditions2.csv')
test_keys = test_keys['nsd_key'].loc[test_keys['is_test'] == 1].values
output = load_data(model_path)
output.shape

(4120, 15)

In [6]:
captions = tokenizer.sequences_to_texts(output)
print(len(captions))
targets = get_target_captions(test_keys)
targets = clean_targets(targets)
print(len(targets))

4120
515


In [7]:
def compute_bleu(captions: list, targets: list):
    captions = [remove_pad_end(c) for c in captions]
    
    weights = [
        (1, 0, 0, 0),
        (0, 1, 0, 0),
        (0, 0, 1, 0),
        (0, 0, 0, 1),
        (1./1., 0, 0, 0),
        (1./2., 1./2., 0, 0),
        (1./3., 1./3., 1./3., 0),
        (1./4., 1./4., 1./4., 1./4.)
    ]
    
    hypothesis = []
    references = []
    for i in range(1):
        caps = captions[i*515:i*515+515]
        for i in range(len(caps)):
            ref = [i.split(" ") for i in targets[i]]
            hyp = caps[i].split(" ")
            hypothesis.append(hyp)
            references.append(ref)

    chencherry = SmoothingFunction()
    for w in weights[4:]:
        b_score = corpus_bleu(references, hypothesis, weights=w, smoothing_function=chencherry.method0)
        print(b_score)
    return b_score

bleu = compute_bleu(captions, targets)
print(bleu)

0.5601214574898785
0.374840023975205
0.2513694386607573
0.17357007227610402
0.17357007227610402


In [31]:
def compute_bleu_single(captions: list, targets: list):
    captions = [remove_pad_end(c) for c in captions]
    
    chencherry = SmoothingFunction()
    weights = [
        (1, 0, 0, 0),
        (0, 1, 0, 0),
        (0, 0, 1, 0),
        (0, 0, 0, 1),
        (1./1., 0, 0, 0),
        (1./2., 1./2., 0, 0),
        (1./3., 1./3., 1./3., 0),
        (1./4., 1./4., 1./4., 1./4.)
    ]
    
    
    hypothesis = []
    references = []
    for i in range(len(captions)):
        ref = [i.split(" ") for i in targets[i]]
        hyp = captions[i].split(" ")
        hypothesis.append(hyp)
        references.append(ref)
    
    b_score = corpus_bleu(references, hypothesis, weights=weights[-1], smoothing_function=chencherry.method0)
    return b_score

for i in range(8):
    print(f"Sub: {i+1} - {(compute_bleu_single(captions[i*515:i*515+515], targets)):.3f}")


Sub: 1 - 0.174
Sub: 2 - 0.172
Sub: 3 - 0.161
Sub: 4 - 0.147
Sub: 5 - 0.181
Sub: 6 - 0.163
Sub: 7 - 0.170
Sub: 8 - 0.121
