In [67]:
import logging
from pathlib import Path
import os
import sys
import mne
import torch
import numpy as np
import bm
from bm import play
from bm.train import main
from bm.events import Word
from matplotlib import pyplot as plt
from IPython import display as disp

mne.set_log_level(False)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
os.chdir(main.dora.dir.parent)
os.environ['NO_DOWNLOAD'] = '1'
from tqdm import tqdm

In [68]:
sig='c97c100b'

In [69]:
solver = play.get_solver_from_sig(sig) 

INFO:bm.play:Loading solver from XP c97c100b. Overrides used: ['model=clip_cnntransformer', 'dset.selections=["gwilliams2022"]', 'dset.bandpass=true', 'dset.bandpass_high=0.1', 'dset.bandpass_lower=40.0', 'seed=2037', 'is_sound=true']
{'wandb': {'use_wandb': True, 'project': 'brainmagick', 'group': 'brainmagick-group'}, 'num_prints': 5, 'device': 'cuda:5', 'verbose': 0, 'show': 0, 'download_only': False, 'is_sound': True, 'slurm': {'mem_per_gpu': 100, 'time': 4320}, 'continue_sig': None, 'continue_best': True, 'seed': 2037, 'dummy': None, 'cache': '/home/zubrikhina/brainmagick_MICCAI/cache', 'features_models': './features_models', 'early_stop_patience': 10, 'eval_every': 1, 'eval_train_set': False, 'optim': {'name': 'adam', 'shuffle': False, 'lr': 0.0003, 'beta2': 0.999, 'eps': 1e-08, 'epochs': 200, 'batch_size': 256, 'loss': 'clip', 'weight_decay': 0.0, 'use_weighting': False, 'max_batches': 1200, 'svd': 0.0, 'negatives': None, 'negative_pool_size': None, 'scheduler': {'name': 'None'}



In [70]:
solver.args.num_workers= 5

In [71]:
solver.args.num_workers

5

In [72]:
solver.datasets

Datasets(train=<torch.utils.data.dataset.ConcatDataset object at 0x7f4757d31df0>, valid=<torch.utils.data.dataset.ConcatDataset object at 0x7f4757c4d610>, test=<torch.utils.data.dataset.ConcatDataset object at 0x7f4757c7f9a0>)

In [73]:
def _get_one_segments_and_vocabs(solver):
    from scripts.run_eval_probs import _get_extra_info
    split ='train'
    segments,vocab, estimates,outputs,features_masks, reject_masks = [], [],[], [],[], []
    dset = getattr(solver.datasets, split)
    loader = solver.make_loader(dset, shuffle=True)
    test_features = solver.datasets.test.datasets[0].features
    for idx, batch in tqdm(enumerate(loader)):
        #features = test_features.extract_features(batch.features, solver.used_features.keys())
        with torch.no_grad():
            estimate, output, features_mask, reject_mask = solver._process_batch(batch)
            data,  words, word_segs= _get_extra_info(batch, solver.args.dset.sample_rate)
        if idx>1:
            break
    return batch, estimate, output, word_segs,vocab

In [74]:
#batch, estimate, output, word_segs,vocab = _get_one_segments_and_vocabs(solver)

In [75]:
#word_segs

In [76]:
def _get_segments_and_vocabs(solver):
    from scripts.run_eval_probs import _get_extra_info
    print(solver.args.num_workers)
    per_split = {}
    for split in ['test']:
        segments,vocab, estimates,outputs,features_masks, reject_masks = [], [],[], [],[], []
        dset = getattr(solver.datasets, split)
        loader = solver.make_loader(dset, shuffle=False)
        test_features = solver.datasets.test.datasets[0].features
        for idx, batch in tqdm(enumerate(loader)):
            with torch.no_grad():
                if split =="test":
                    features = test_features.extract_features(batch.features, solver.used_features.keys())
                    estimate, output, features_mask, reject_mask = solver._process_batch(batch.replace(features=features))
                else: 
                     estimate, output, features_mask, reject_mask = solver._process_batch(batch)
                data,  words, word_segs= _get_extra_info(batch, solver.args.dset.sample_rate)
                segments.append(word_segs)
                vocab.append(words)
                estimates.append(estimate.detach().cpu())
                outputs.append(output.detach().cpu())                
        estimates = torch.cat(estimates, dim=0)
        outputs = torch.cat(outputs, dim=0)
        per_split[split] = (segments, vocab, estimates,outputs)
    return per_split

In [77]:
per_split =_get_segments_and_vocabs(solver)

5


274it [10:39,  2.33s/it]


In [78]:
segments, vocab, estimates,outputs =per_split['test']

In [79]:
segments[0].shape

(256,)

In [80]:
len(segments)

274

In [81]:
segments[0]

array(['Results Harmon she suppressed the surge of annoyance',
       'Harmon she suppressed the surge of annoyance that ran',
       'she suppressed the surge of annoyance that ran through',
       'suppressed the surge of annoyance that ran through her as she',
       'suppressed the surge of annoyance that ran through her as she contemplated',
       'suppressed the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'annoyance that ran through her as she contemplated the',
       'annoyance that ran through her as she contemplated the',
       'that ran through her as she contemplated the gift',
       'ran through her as she contemplated the gift',
       'ran through her as she contemplated the gift of',
       'through her as she contemplated the gift of getting',
       'her as she contemplated the gift of getting',
       'contemplated the gift of getting all the hot news first',
       

In [82]:
new_segments_test=[]
for segment in segments:
    for elem in segment:
        new_segments_test.append(elem)

In [83]:
# segments_train, vocab_train, _,_ =per_split['valid']

In [84]:
# new_segments_train=[]
# for segment in segments_train:
#     for elem in segment:
#         new_segments_train.append(elem)

In [85]:
# set(new_segments_train) & set(new_segments_test)

In [86]:
new_vocab =[]
for voc in vocab:
    for elem in voc:
        new_vocab.append(elem[0])

In [87]:
len(new_segments_test)

69972

In [88]:
len(new_vocab)

69972

In [89]:
len(estimates)

69972

In [90]:
class RepresentationSimilarity():
     def __init__(self,solver,segments, vocab, estimates,outputs):
         self.solver =solver
         self.segments =np.array(segments)
         self.vocab =np.array(vocab)
         self.estimates =estimates
         self.outputs =outputs
         self.clip = solver.loss
         
     def get_item(self, ind):
         return self.segments[ind],self.vocab[ind], self.estimates[ind],self.outputs[ind]

     def get_brain_simmilatity(self, ind, topk, without_target=False):
        segment,vocab,estimate,output = self.get_item(ind)
        if without_target:
            estimates = torch.tensor(np.delete(self.estimates.numpy(), ind,0))
            segments = np.delete(np.copy(self.segments), ind,0)
            probas = self.clip.get_probabilities(estimate[None], estimates)[0]
            print(probas.numpy())
            _, bests = probas.topk(topk)
            print(bests.numpy())
            return self.segments[ind], segments[bests.numpy()],bests.numpy()
        else:
            #probas = self.clip.get_probabilities(estimate[None], self.estimates)[0]

            probas = torch.einsum("bct,oct->bo", estimate[None], self.estimates)[0]
            print(probas.numpy())
            _, bests = probas.topk(topk)
            print(bests.numpy())
            return self.segments[ind], self.segments[bests.numpy()],bests.numpy()
         
     def get_audio_simmilatity(self, ind, topk):
        segment,vocab,estimate,output = self.get_item(ind)
        #probas = self.clip.get_probabilities(output[None], self.outputs)[0]

        probas = torch.einsum("bct,oct->bo", output[None], self.outputs)[0]
        _, bests = probas.topk(topk)
        return self.segments[ind], self.segments[bests.numpy()], bests.numpy()
         
     def decode_brain_activity(self, ind, topk):
        segment,vocab,estimate,output = self.get_item(ind)
        #probas = self.clip.get_probabilities(estimate[None], self.outputs)[0]

        probas = torch.einsum("bct,oct->bo", estimate[None], self.outputs)[0]
        _, bests = probas.topk(topk)
        print(bests)
        return self.segments[ind], self.segments[bests.numpy()], bests.numpy() 

In [91]:
repsim =RepresentationSimilarity(solver,new_segments_test, new_vocab, estimates,outputs)

In [92]:
repsim.get_brain_simmilatity(0, 10, without_target=False)

[431046.16     -568.4878  -4866.793  ...   6539.334   23209.074
  37223.543 ]
[    0 25746 66131 32593 48666 66520 64451 47567 39574 52955]


('Results Harmon she suppressed the surge of annoyance',
 array(['Results Harmon she suppressed the surge of annoyance',
        'blonde slowly broke by', 'I',
        'stories had an originality to them',
        'his shoes when Chad was five He thought', 'this a wind house',
        'leaned in low and his breath was sour it blew in face', 'I',
        'before continuing in another tone entirely',
        'He thought about gathering himself'], dtype='<U84'),
 array([    0, 25746, 66131, 32593, 48666, 66520, 64451, 47567, 39574,
        52955]))

In [93]:
repsim.get_brain_simmilatity(500, 10, without_target=False)

[ -6591.1187  29941.965  -33487.26   ...   2421.761    9170.454
  26660.504 ]
[  500 19035  6278 32791 65712 64959 31324 41912 50398 30061]


('passes with his hands paused and nodded then did a sort of sign language',
 array(['passes with his hands paused and nodded then did a sort of sign language',
        'going to do it in two parts last part first',
        'way I describe and feet I could',
        'stoic tree and let forth a moan a cry of purest agony that',
        'face slowly animated joy sweeping',
        'call realized kiss where well lights open case creation',
        'stirring up the behind in a great swirling',
        'passes with his hands paused and nodded then did a sort of sign language',
        'the ground with his finger stone',
        'unaware of the rest of the bridge shaking blonde curls as'],
       dtype='<U84'),
 array([  500, 19035,  6278, 32791, 65712, 64959, 31324, 41912, 50398,
        30061]))

In [94]:
repsim.get_brain_simmilatity(800, 10)

[17807.17   14201.541  24700.906  ... 54031.312  32775.68   -4867.8896]
[  800 66029 34061  9253 30987  1646 48291 14106 48453 16223]


('Life Abroad to find his childhood friend a bride to be thus',
 array(['Life Abroad to find his childhood friend a bride to be thus',
        'patience was long gone and I was back in the car to',
        'loaded it onto the roof',
        'What if it work violating pulled mouth different',
        'unfolding and lowering his legs to the roadside',
        'sprinted for the corner of the building around which Roy',
        'honesty and a certain propensity for insight',
        'thing instead all that filled his mind was',
        'slowly beneath the branches a gust wind stirring',
        'ritual or ceremonial gestures a long'], dtype='<U84'),
 array([  800, 66029, 34061,  9253, 30987,  1646, 48291, 14106, 48453,
        16223]))

In [95]:
import random

In [96]:
K=30

In [97]:
for i in range(K):
    indx =random.randint(0,60172)
    segment, similar_segments, seg_ids =repsim.get_brain_simmilatity(indx, 50, without_target=False)
    print(segment)
    print("More similar brain parts:")
    print(similar_segments)
    print("More similar audio fragments")
    segment_audio, similar_segments_audio,seg_au_ids =repsim.get_audio_simmilatity(indx,50)
    print(similar_segments_audio)
    print("Predicted values using brain activity: ")
    segment_brain, decoded_segments, secoded_ids_best =repsim.decode_brain_activity(indx, 50)
    print(decoded_segments)
    print("_____________")

[37746.676 45670.723  6402.95  ... 19199.242 70117.84  38502.31 ]
[31639 31400 44894 48265 18281 66361 65670 42038 48440 34148 22227 33905
 18931 65676 31311 66200 62249 41779 65470 65361 64294 66972 21280 31416
 55580 64622 53557 61639 62988 48871 33004 18978 66273 66041 64771 53733
  5883 51971 44091 13760 23893 53555 42622 48028 65548 68693 19662 32141
 56666 22507]
corner of the building around which Roy had disappeared
More similar brain parts:
['corner of the building around which Roy had disappeared'
 'the fallen leaves and needles to join the water of the stream flowing'
 'things fell together for me looking at the big statue the Hawaiian'
 'Nathan aghast' 'This is one of the strongest works read in ages'
 'How much can we sell before someone becomes suspicious'
 'seeping though the fallen leaves and needles to join the water of the stream'
 'things fell together for me looking at the big statue the Hawaiian'
 'woods in which it was his habit walk in a'
 'also motivation whethe

In [164]:
segment_brain

'the surge of annoyance that ran through her as she contemplated'

In [165]:
secoded_ids_best

array([52842, 31422, 24282,  8574, 18570, 45702, 49986, 44274, 22854,
       21426, 15714, 34278, 32850, 11430, 25710, 10002, 37134, 39990,
       14286, 12858])

In [166]:
decoded_segments

array(['the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ra

In [108]:
train_seq =per_split['train'][0]

KeyError: 'train'

In [16]:
test_seq =per_split['test'][0]

In [13]:
train_set =set()
for elem in train_seq:
    train_set.update(list(elem))

In [17]:
test_set =set()
for elem in test_seq:
    test_set.update(list(elem))

In [15]:
len(train_set)

7852

In [18]:
len(test_set)

1197

In [20]:
len(set(train_set) & set(test_set))

3

In [23]:
for elem in test_set:
    if 'Harmon ' in elem:
        print(elem)

Harmon she suppressed the surge of annoyance that ran through
Harmon she suppressed the surge of annoyance that ran
Results Harmon she suppressed the surge of annoyance


In [24]:
for elem in train_set:
    if 'Harmon ' in elem:
        print(elem)

Harmon that good nothing son of beta get his
Harmon that good nothing son of beta get


## Посмотрим на распределение аудио и мозговой активности

In [41]:
from torch.utils.data import DataLoader
from bm.dataset import SegmentBatch
from scripts.run_eval_probs import _get_extra_info

In [42]:
defaults = {
            'batch_size': 256,
            'num_workers': 4,
            'collate_fn': SegmentBatch.collate_fn,
            'shuffle': False
        }

In [43]:
dataloader =DataLoader(solver.datasets.test, **defaults)

In [44]:
batch =next(iter(dataloader))

In [45]:
 data1,  words1, word_segs1= _get_extra_info(batch, solver.args.dset.sample_rate)

In [46]:
word_segs1

array(['Results Harmon she suppressed the surge of annoyance',
       'Harmon she suppressed the surge of annoyance that ran',
       'she suppressed the surge of annoyance that ran through',
       'suppressed the surge of annoyance that ran through her as she',
       'suppressed the surge of annoyance that ran through her as she contemplated',
       'suppressed the surge of annoyance that ran through her as she contemplated',
       'the surge of annoyance that ran through her as she contemplated',
       'annoyance that ran through her as she contemplated the',
       'annoyance that ran through her as she contemplated the',
       'that ran through her as she contemplated the gift',
       'ran through her as she contemplated the gift',
       'ran through her as she contemplated the gift of',
       'through her as she contemplated the gift of getting',
       'her as she contemplated the gift of getting',
       'contemplated the gift of getting all the hot news first',
       

In [47]:
defaults = {
            'batch_size': 256,
            'num_workers': 4,
            'collate_fn': SegmentBatch.collate_fn,
            'shuffle': True
        }

In [48]:
dataloader =DataLoader(solver.datasets.test, **defaults)

In [49]:
batch =next(iter(dataloader))

In [50]:
batch

SegmentBatch(meg=tensor([[[-1.3454e-14, -4.0237e-14, -6.1659e-14,  ..., -1.6657e-13,
          -2.3771e-13, -1.6154e-13],
         [-4.0287e-14, -2.4706e-14, -8.6957e-15,  ...,  3.2960e-13,
           3.2682e-13,  3.9180e-13],
         [-7.7937e-14, -6.2094e-14, -2.1099e-13,  ...,  3.9203e-14,
           6.1918e-14,  2.6997e-14],
         ...,
         [ 2.0480e-13,  1.9321e-13,  1.4768e-13,  ..., -4.9384e-13,
          -4.0298e-13, -3.7827e-13],
         [-8.7258e-14, -1.1450e-13, -6.2663e-14,  ...,  1.8316e-13,
           2.0836e-13,  8.9438e-14],
         [-5.9959e-14,  3.1446e-14,  4.4422e-14,  ...,  2.5197e-13,
           3.0399e-13,  2.0258e-13]],

        [[ 7.5983e-14,  6.9774e-14,  1.0413e-13,  ...,  5.3934e-13,
           2.4375e-13,  4.9310e-15],
         [ 3.2659e-14, -5.9550e-14,  1.2657e-13,  ...,  7.9708e-13,
           6.4038e-13,  3.0066e-13],
         [ 9.6183e-14, -6.6715e-15,  5.1828e-14,  ..., -1.6661e-13,
           1.2017e-13,  4.2300e-13],
         ...,
        

In [51]:
batch._event_lists

[[DataSlice(start=742.425, duration=3.0083333333333258, modality=None, language=None, sample_rate=120.0),
  Sound(start=737.817, duration=9.240000000000009, modality='audio', language='english', filepath='/home/zubrikhina/brainmagick_MICCAI/data/gwilliams2022/download/stimuli/audio/easy_money_4.wav', offset=92.58000000000004),
  Word(start=742.237, duration=0.3799999999999954, modality='audio', language='english', word='stepped', word_index=14, word_sequence='For far I could see by a weak gray light stone stepped up and down making little hills and and platforms'),
  Word(start=742.617, duration=0.1800000000000068, modality='audio', language='english', word='up', word_index=15, word_sequence='For far I could see by a weak gray light stone stepped up and down making little hills and and platforms'),
  Word(start=742.797, duration=0.1299999999999954, modality='audio', language='english', word='and', word_index=16, word_sequence='For far I could see by a weak gray light stone stepped up a

In [52]:
data2,  words2, word_segs2= _get_extra_info(batch, solver.args.dset.sample_rate)

In [56]:
len(word_segs2.tolist())

256

In [57]:
len(set(word_segs2))

228

In [58]:
word_segs2

array(['stepped up and down making little hills and and platforms',
       'let forth a moan a cry of purest agony that escaped',
       'ritual or ceremonial gestures a',
       'ancient stories of mankind the tales basic to',
       'feet I could see same design from little journal and',
       'was settling toward the time that Rayburn',
       'the best story ever written',
       'worried for just a moment when I started waving my hands',
       'smelled like first grade He wished he were there now',
       'see by a weak gray light stone stepped up',
       'in which it was his habit walk in a mood',
       'hours on the watch full lips with frustration',
       'pen cast a forbidding line of shadow across the page',
       'friend a bride to be thus upsetting the apple cart',
       'to the ground and seeping though the fallen leaves and needles to join the water',
       'like sleight of hand', 'jerked meat on the bone was wearing',
       'mouth different new deduction magicia