# Train Encoder

In [None]:
import os
import numpy as np
import json
import argparse

import config
from GPT import GPT
from StimulusModel import LMFeatures
from utils_stim import get_stim
from utils_resp import get_resp
from utils_ridge.ridge import ridge, bootstrap_ridge
np.random.seed(42)

If you get path errors, then change dir using os.chdir to 'whatever-your-root-is/semantic-decoding/

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument("--subject", type = str, required = True)
parser.add_argument("--gpt", type = str, default = "perceived")
parser.add_argument("--sessions", nargs = "+", type = int, default = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 20])
args = parser.parse_args("--subject S1".split())

In [4]:
# training stories
stories = []
with open(os.path.join(config.DATA_TRAIN_DIR, "sess_to_story.json"), "r") as f:
    sess_to_story = json.load(f) 
for sess in args.sessions:
    stories.extend(sess_to_story[str(sess)])

In [5]:
 # load gpt
with open(os.path.join(config.DATA_LM_DIR, args.gpt, "vocab.json"), "r") as f:
    gpt_vocab = json.load(f)
gpt = GPT(path = os.path.join(config.DATA_LM_DIR, args.gpt, "model"), vocab = gpt_vocab, device = config.GPT_DEVICE)
features = LMFeatures(model = gpt, layer = config.GPT_LAYER, context_words = config.GPT_WORDS)

  return self.fget.__get__(instance, owner)()


In [10]:
# estimate encoding model
rstim, tr_stats, word_stats = get_stim(stories, features)

In [32]:
rresp = get_resp(args.subject, stories, stack = True)

In [34]:
rresp.shape

(27449, 81126)

In [35]:
nchunks = int(np.ceil(rresp.shape[0] / 5 / config.CHUNKLEN))

In [40]:
weights, alphas, bscorrs = bootstrap_ridge(rstim, rresp, use_corr = False, alphas = config.ALPHAS, nboots = config.NBOOTS, chunklen = config.CHUNKLEN, nchunks = nchunks)

In [49]:
weights.shape

(3072, 81126)

In [47]:
np.save('/workspace/weights/S1.npy', weights)

In [50]:
bscorrs = bscorrs.mean(2).max(0)
vox = np.sort(np.argsort(bscorrs)[-config.VOXELS:])

In [51]:
stim_dict = {story : get_stim([story], features, tr_stats = tr_stats) for story in stories}
resp_dict = get_resp(args.subject, stories, stack = False, vox = vox)
noise_model = np.zeros([len(vox), len(vox)])

In [52]:
## 138min 16.9s

for hstory in stories:
    tstim, hstim = np.vstack([stim_dict[tstory] for tstory in stories if tstory != hstory]), stim_dict[hstory]
    tresp, hresp = np.vstack([resp_dict[tstory] for tstory in stories if tstory != hstory]), resp_dict[hstory]
    bs_weights = ridge(tstim, tresp, alphas[vox])
    resids = hresp - hstim.dot(bs_weights)
    bs_noise_model = resids.T.dot(resids)
    noise_model += bs_noise_model / np.diag(bs_noise_model).mean() / len(stories)

In [53]:
# save
save_location = os.path.join(config.MODEL_DIR, args.subject)
os.makedirs(save_location, exist_ok = True)
np.savez(os.path.join(save_location, "encoding_model_%s" % args.gpt), 
    weights = weights, noise_model = noise_model, alphas = alphas, voxels = vox, stories = stories,
    tr_stats = np.array(tr_stats), word_stats = np.array(word_stats))

# Train Wordrate

In [21]:
import os
import numpy as np
import json
import argparse

import config
from utils_stim import get_story_wordseqs
from utils_resp import get_resp
from utils_ridge.DataSequence import DataSequence
from utils_ridge.util import make_delayed
from utils_ridge.ridge import bootstrap_ridge
np.random.seed(42)

In [23]:
parser = argparse.ArgumentParser()
parser.add_argument("--subject", type = str, required = True)
parser.add_argument("--sessions", nargs = "+", type = int, 
    default = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 20])
args = parser.parse_args("--subject S1".split())

In [25]:
# training stories
stories = []
with open(os.path.join(config.DATA_TRAIN_DIR, "sess_to_story.json"), "r") as f:
    sess_to_story = json.load(f) 
for sess in args.sessions:
    stories.extend(sess_to_story[str(sess)])

In [26]:
# ROI voxels
with open(os.path.join(config.DATA_TRAIN_DIR, "ROIs", "%s.json" % args.subject), "r") as f:
    vox = json.load(f)
        
# estimate word rate model
save_location = os.path.join(config.MODEL_DIR, args.subject)
os.makedirs(save_location, exist_ok = True)

In [27]:
wordseqs = get_story_wordseqs(stories)
rates = {}
for story in stories:
    ds = wordseqs[story]
    words = DataSequence(np.ones(len(ds.data_times)), ds.split_inds, ds.data_times, ds.tr_times)
    rates[story] = words.chunksums("lanczos", window = 3)
nz_rate = np.concatenate([rates[story][5+config.TRIM:-config.TRIM] for story in stories], axis = 0)
nz_rate = np.nan_to_num(nz_rate).reshape([-1, 1])
mean_rate = np.mean(nz_rate)
rate = nz_rate - mean_rate

In [28]:
## 35m 25.9s

for roi in ["speech", "auditory"]:
    resp = get_resp(args.subject, stories, stack = True, vox = vox[roi])
    delresp = make_delayed(resp, config.RESP_DELAYS)
    nchunks = int(np.ceil(delresp.shape[0] / 5 / config.CHUNKLEN))    
    weights, _, _ = bootstrap_ridge(delresp, rate, use_corr = False,
        alphas = config.ALPHAS, nboots = config.NBOOTS, chunklen = config.CHUNKLEN, nchunks = nchunks)
    np.savez(os.path.join(save_location, "word_rate_model_%s" % roi), 
        weights = weights, mean_rate = mean_rate, voxels = vox[roi])

# Run Decoder

In [1]:
import os
import numpy as np
import json
import argparse
import h5py
from pathlib import Path

import config
from GPT import GPT
from Decoder import Decoder, Hypothesis
from LanguageModel import LanguageModel
from EncodingModel import EncodingModel
from StimulusModel import StimulusModel, get_lanczos_mat, affected_trs, LMFeatures
from utils_stim import predict_word_rate, predict_word_times

  _C._set_default_tensor_type(t)


In [31]:
parser = argparse.ArgumentParser()
parser.add_argument("--subject", type = str, required = True)
parser.add_argument("--experiment", type = str, required = True)
parser.add_argument("--task", type = str, required = True)
args = parser.parse_args("--subject S1 --experiment perceived_speech --task wheretheressmoke".split())

In [32]:
# determine GPT checkpoint based on experiment
if args.experiment in ["imagined_speech"]: gpt_checkpoint = "imagined"
else: gpt_checkpoint = "perceived"

# determine word rate model voxels based on experiment
if args.experiment in ["imagined_speech", "perceived_movies"]: word_rate_voxels = "speech"
else: word_rate_voxels = "auditory"

In [33]:
hf = h5py.File(os.path.join(config.DATA_TEST_DIR, "test_response", args.subject, args.experiment, args.task + ".hf5"), "r")
resp = np.nan_to_num(hf["data"][:])
hf.close()

In [34]:
resp.shape

(291, 81126)

In [35]:
# load gpt
with open(os.path.join(config.DATA_LM_DIR, gpt_checkpoint, "vocab.json"), "r") as f:
    gpt_vocab = json.load(f)
with open(os.path.join(config.DATA_LM_DIR, "decoder_vocab.json"), "r") as f:
    decoder_vocab = json.load(f)
gpt = GPT(path = os.path.join(config.DATA_LM_DIR, gpt_checkpoint, "model"), vocab = gpt_vocab, device = config.GPT_DEVICE)
features = LMFeatures(model = gpt, layer = config.GPT_LAYER, context_words = config.GPT_WORDS)
lm = LanguageModel(gpt, decoder_vocab, nuc_mass = config.LM_MASS, nuc_ratio = config.LM_RATIO)

In [36]:
# load models
load_location = os.path.join(config.MODEL_DIR, args.subject)
word_rate_model = np.load(os.path.join(load_location, "word_rate_model_%s.npz" % word_rate_voxels), allow_pickle = True)
encoding_model = np.load(os.path.join(load_location, "encoding_model_%s.npz" % gpt_checkpoint))
weights = encoding_model["weights"]
noise_model = encoding_model["noise_model"]
tr_stats = encoding_model["tr_stats"]
word_stats = encoding_model["word_stats"]
em = EncodingModel(resp, weights, encoding_model["voxels"], noise_model, device = config.EM_DEVICE)
em.set_shrinkage(config.NM_ALPHA)
assert args.task not in encoding_model["stories"]

In [37]:
# predict word times
word_rate = predict_word_rate(resp, word_rate_model["weights"], word_rate_model["voxels"], word_rate_model["mean_rate"])
if args.experiment == "perceived_speech": word_times, tr_times = predict_word_times(word_rate, resp, starttime = -10)
else: word_times, tr_times = predict_word_times(word_rate, resp, starttime = 0)
lanczos_mat = get_lanczos_mat(word_times, tr_times)

  val = window * np.sin(np.pi*t) * np.sin(np.pi*t/window) / (np.pi**2 * t**2)


In [38]:
# 169m 45.9s

# decode responses
decoder = Decoder(word_times, config.WIDTH)
sm = StimulusModel(lanczos_mat, tr_stats, word_stats[0], device = config.SM_DEVICE)
for sample_index in range(len(word_times)):
    trs = affected_trs(decoder.first_difference(), sample_index, lanczos_mat)
    ncontext = decoder.time_window(sample_index, config.LM_TIME, floor = 5)
    beam_nucs = lm.beam_propose(decoder.beam, ncontext)
    for c, (hyp, nextensions) in enumerate(decoder.get_hypotheses()):
        nuc, logprobs = beam_nucs[c]
        if len(nuc) < 1: continue
        extend_words = [hyp.words + [x] for x in nuc]
        extend_embs = list(features.extend(extend_words))
        stim = sm.make_variants(sample_index, hyp.embs, extend_embs, trs)
        likelihoods = em.prs(stim, trs)
        local_extensions = [Hypothesis(parent = hyp, extension = x) for x in zip(nuc, logprobs, extend_embs)]
        decoder.add_extensions(local_extensions, likelihoods, nextensions)
    decoder.extend(verbose = False)

In [41]:
if args.experiment in ["perceived_movie", "perceived_multispeaker"]: decoder.word_times += 10
save_location = os.path.join(config.RESULT_DIR, args.subject, args.experiment)
os.makedirs(save_location, exist_ok = True)
decoder.save(os.path.join(save_location, args.task))

# Evaluate outputs

In [48]:
import os
import numpy as np
import json
import argparse

import config
from utils_eval import generate_null, load_transcript, windows, segment_data, WER, BLEU, METEOR, BERTSCORE

In [49]:
parser = argparse.ArgumentParser()
parser.add_argument("--subject", type = str, required = True)
parser.add_argument("--experiment", type = str, required = True)
parser.add_argument("--task", type = str, required = True)
parser.add_argument("--metrics", nargs = "+", type = str, default = ["WER", "BLEU", "METEOR", "BERT"])
parser.add_argument("--references", nargs = "+", type = str, default = [])
parser.add_argument("--null", type = int, default = 10)
args = parser.parse_args("--subject S1 --experiment perceived_speech --task wheretheressmoke".split())

In [51]:
if len(args.references) == 0:
    args.references.append(args.task)
    
with open(os.path.join(config.DATA_TEST_DIR, "eval_segments.json"), "r") as f:
    eval_segments = json.load(f)

In [52]:
# 2m 52.1s

# load language similarity metrics
metrics = {}
if "WER" in args.metrics: metrics["WER"] = WER(use_score = True)
if "BLEU" in args.metrics: metrics["BLEU"] = BLEU(n = 1)
if "METEOR" in args.metrics: metrics["METEOR"] = METEOR()
if "BERT" in args.metrics: metrics["BERT"] = BERTSCORE(
    idf_sents = np.load(os.path.join(config.DATA_TEST_DIR, "idf_segments.npy")), 
    rescale = False, 
    score = "recall")

  self.metric = load_metric("bleu", keep_in_memory=True)
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.48k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [53]:
# load prediction transcript
pred_path = os.path.join(config.RESULT_DIR, args.subject, args.experiment, args.task + ".npz")
pred_data = np.load(pred_path)
pred_words, pred_times = pred_data["words"], pred_data["times"]

In [54]:
# 8m 8.9sec

# generate null sequences
if args.experiment in ["imagined_speech"]: gpt_checkpoint = "imagined"
else: gpt_checkpoint = "perceived"
null_word_list = generate_null(pred_times, gpt_checkpoint, args.null)

In [55]:
window_scores, window_zscores = {}, {}
story_scores, story_zscores = {}, {}
for reference in args.references:

    # load reference transcript
    ref_data = load_transcript(args.experiment, reference)
    ref_words, ref_times = ref_data["words"], ref_data["times"]

    # segment prediction and reference words into windows
    window_cutoffs = windows(*eval_segments[args.task], config.WINDOW)
    ref_windows = segment_data(ref_words, ref_times, window_cutoffs)
    pred_windows = segment_data(pred_words, pred_times, window_cutoffs)
    null_window_list = [segment_data(null_words, pred_times, window_cutoffs) for null_words in null_word_list]
    
    for mname, metric in metrics.items():

        # get null score for each window and the entire story
        window_null_scores = np.array([metric.score(ref = ref_windows, pred = null_windows) 
                                        for null_windows in null_window_list])
        story_null_scores = window_null_scores.mean(1)

        # get raw score and normalized score for each window
        window_scores[(reference, mname)] = metric.score(ref = ref_windows, pred = pred_windows)
        window_zscores[(reference, mname)] = (window_scores[(reference, mname)] 
                                                - window_null_scores.mean(0)) / window_null_scores.std(0)

        # get raw score and normalized score for the entire story
        story_scores[(reference, mname)] = metric.score(ref = ref_windows, pred = pred_windows)
        story_zscores[(reference, mname)] = (story_scores[(reference, mname)].mean()
                                                - story_null_scores.mean()) / story_null_scores.std()

save_location = os.path.join(config.REPO_DIR, "scores", args.subject, args.experiment)
os.makedirs(save_location, exist_ok = True)
np.savez(os.path.join(save_location, args.task), 
            window_scores = window_scores, window_zscores = window_zscores, 
            story_scores = story_scores, story_zscores = story_zscores)

ValueError: After applying the transforms on the reference and hypothesis sentences, their lengths must match. Instead got 49 reference and 56 hypothesis sentences.