In [9]:
from neural_nlp.benchmarks import benchmark_pool
pereira = benchmark_pool["Pereira2018-encoding"]
data = pereira._load_assembly()

data

In [10]:
# post-processing copied from PereiraBenchmark in neural.py (lines 393/394)

data = data.dropna('neuroid')

print(data.shape)

(627, 49760)


In [14]:
import numpy as np

# Extract stimuli (sentences) from xarray
stimuli_texts = {}
for _, row in data.attrs['stimulus_set'].iterrows():
    stimuli_texts[row['stimulus_id']] = row['sentence']


# Extract fmri data
stimuli = []
brain_reps = []
for idx, stimulus_id in enumerate(data.coords['stimulus_id']):
    stimuli.append(stimuli_texts[stimulus_id.item()])
    brain_reps.append(data.values[idx, :])

brain_reps = np.array(brain_reps)

assert len(stimuli) == len(brain_reps)

print(brain_reps.shape)

(627, 49760)


In [13]:
# import pickle
# from pathlib import Path
#
# corpora = Path(__file__) / "../corpora/"
# corpora.mkdir(exist_ok=True)
#
# pereira_datafile = corpora / "pereira.pkl"
#
# with open(pereira_datafile, 'wb') as f:
#     pickle.dump((stimuli, brain_reps), f)

In [None]:
# start running the notebook here if you 
# have the pereira.pkl file already!

# import pickle
# import numpy as np
#
#
#
# with open('../corpora/pereira.pkl', 'rb') as f:
#     stimuli, brain_reps = pickle.load(f)
#
# print(brain_reps.shape)

In [15]:
import torch
from transformers import GPT2Tokenizer, GPT2Model

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2', output_hidden_states=True)
model = model.eval()  # Turn off dropout

model

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0): GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (1): GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP

In [16]:
import numpy as np
from tqdm import tqdm

hidden_states = []
with torch.no_grad():
    for stimulus in tqdm(stimuli):
        output = model(
            **tokenizer([stimulus], add_special_tokens=True, return_tensors='pt')
        )
        
        layer_reps = []
        for layer_hidden_states in output.hidden_states:
            # in models/implementations.py, Transformer uses the rep
            # of the last word (line 595)
            layer_reps.append(layer_hidden_states.squeeze()[-1, :].numpy())
        hidden_states.append(layer_reps)
hidden_states = np.array(hidden_states)
print(hidden_states.shape)

100%|██████████| 627/627 [00:19<00:00, 32.22it/s]

(627, 13, 768)





In [19]:
from scipy.stats import pearsonr
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression

k_folds = KFold(n_splits=5, shuffle=True)

pearsonrs = []
for fold, (train_indices, test_indices) in enumerate(k_folds.split(hidden_states)):
    pearsonrs.append([])
    train_brain_reps, test_brain_reps = brain_reps[train_indices], brain_reps[test_indices]
    for layer_num in tqdm(range(len(hidden_states[0])), desc='fold%s' % fold):
        train_hidden_states, test_hidden_states = \
            hidden_states[train_indices, layer_num, :], \
            hidden_states[test_indices, layer_num, :]


        # TODO: Are they doing any kind of hyperparameter tuning
        # (regularization, etc) here?  We're using SKLearn's defaults

        model = LinearRegression().fit(train_hidden_states, train_brain_reps)
        pred_brain_reps = model.predict(test_hidden_states)
        
        # We aggregated voxel/electrode/ROI predictivity scores by taking the
        # median of scores for each participant’s voxels/electrodes/ROIs and
        # then computing the median across participants. Finally, this score was
        # divided by the estimated ceiling value (see below) to yield a final score in
        # the range [0, 1].
        
        # https://github.com/brain-score/brain-score/blob/master/brainscore/metrics/xarray_utils.py#L78
        # https://github.com/brain-score/brain-score/blob/master/brainscore/metrics/regression.py#L33
        # https://github.com/brain-score/brain-score/blob/master/brainscore/metrics/transformations.py#L42
        
        # not totally sure this is right...
        
        layer_pearson_rs = []
        for pred_brain_rep, test_brain_rep in zip(pred_brain_reps, test_brain_reps):
            layer_pearson_rs.append(pearsonr(pred_brain_rep, test_brain_rep))
        
        pearsonrs[-1].append(np.median(layer_pearson_rs))


fold0: 100%|██████████| 13/13 [03:21<00:00, 15.51s/it]
fold1: 100%|██████████| 13/13 [03:08<00:00, 14.50s/it]
fold2: 100%|██████████| 13/13 [02:49<00:00, 13.06s/it]
fold3: 100%|██████████| 13/13 [03:30<00:00, 16.21s/it]
fold4: 100%|██████████| 13/13 [03:56<00:00, 18.18s/it]


In [20]:
pearsonrs = np.array(pearsonrs)
for layer_num in range(pearsonrs.shape[1]):
    print((layer_num, np.mean(pearsonrs[:, layer_num]), np.median(pearsonrs[:, layer_num])))


(0, 0.00408277367472118, 8.939323940392131e-14)
(1, 1.422616914515031e-15, 1.8549810867016208e-22)
(2, 6.320003546318065e-08, 6.83100467308954e-22)
(3, 9.14089075647834e-14, 5.9366842767137336e-21)
(4, 8.801125140245791e-07, 3.5496849966057153e-15)
(5, 4.2953452545340317e-19, 1.8873712591467982e-27)
(6, 2.3607197244805418e-12, 3.500090920938019e-24)
(7, 2.917129875655254e-08, 2.6814374774741727e-11)
(8, 3.231244327004638e-11, 1.435822032253782e-14)
(9, 1.2707904948769912e-06, 6.587829655035209e-15)
(10, 8.891380065260758e-18, 1.3435720055964806e-22)
(11, 1.8299568950772305e-17, 1.6172832278772864e-20)
(12, 7.724724866125182e-11, 1.0161555789294238e-19)


In [22]:
# Compare with the ceiling
pereira.ceiling