In [2]:
from tqdm.notebook import tqdm
import pickle, gzip
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import torch

import nibabel as nib
from nilearn import maskers
from nilearn.plotting import view_img
from nilearn.image import resample_img

from neurovlm.data import get_data_dir
from neurovlm.models import Specter

from neurovlm.brain_input import search_papers_from_brain, search_wiki_from_brain

In [3]:
get_data_dir()

PosixPath('/Users/borng/code/lab_work/neurovlm/src/neurovlm/neurovlm_data')

In [3]:
# Load network atlases
with gzip.open(get_data_dir() / f"networks_arrays.pkl.gz", "rb") as f:
    networks = pickle.load(f)

In [4]:
# Load models
proj_head_mse_adhoc = torch.load(get_data_dir() / f"proj_head_image_infonce.pt", weights_only=False).cpu()
proj_head_img = torch.load(get_data_dir() / f"proj_head_image_infonce.pt", weights_only=False).cpu()
proj_head_text = torch.load(get_data_dir() / f"proj_head_text_infonce.pt", weights_only=False).cpu()
specter = Specter("allenai/specter2_aug2023refresh", adapter="adhoc_query")
autoencoder = torch.load(get_data_dir() / "autoencoder_sparse.pt", weights_only=False).cpu()
# decoder = autoencoder.decoder.to("cpu")
mask_arrays = np.load(f"{get_data_dir()}/mask.npz", allow_pickle=True)
mask_img = nib.Nifti1Image(mask_arrays["mask"].astype(float),  mask_arrays["affine"])
masker = maskers.NiftiMasker(mask_img=mask_img, dtype=np.float32).fit()

There are adapters available but none are activated for the forward pass.


In [5]:
networks = {k: v for _k in networks.keys() for k, v in networks[_k].items()}

In [6]:
mask_arrays = np.load(f"{get_data_dir()}/mask.npz", allow_pickle=True)
mask_img = nib.Nifti1Image(mask_arrays["mask"].astype(float),  mask_arrays["affine"])
masker = maskers.NiftiMasker(mask_img=mask_img, dtype=np.float32).fit()
networks_resampled = {}

for k in tqdm(networks.keys(), total=len(networks)):
    img = nib.Nifti1Image(networks[k]["array"], affine=networks[k]["affine"])

    if len(np.unique(networks[k]["array"])) == 2:
        # binary data
        img_resampled = resample_img(img, mask_arrays["affine"], interpolation="nearest")
    else:
        img_resampled = resample_img(img, mask_arrays["affine"])
        img_resampled_arr = img_resampled.get_fdata()
        img_resampled_arr[img_resampled_arr < 0] = 0.
        thresh = np.percentile(img_resampled_arr.flatten(), 95)
        img_resampled_arr[img_resampled_arr < thresh] = 0.
        img_resampled_arr[img_resampled_arr >= thresh] = 1.
        img_resampled = nib.Nifti1Image(img_resampled_arr, affine=mask_arrays["affine"])

    networks_resampled[k] = img_resampled

  0%|          | 0/152 [00:00<?, ?it/s]

In [7]:
networks_embed = {}

for k, v in tqdm(networks_resampled.items(), total=len(networks_resampled)):
    networks_embed[k] = autoencoder.encoder(torch.from_numpy(masker.transform(v)))

  0%|          | 0/152 [00:00<?, ?it/s]

## Brain to text (Network vs Paper)

In [8]:
df_similar_paper_titles = pd.DataFrame(columns=[
    "atlas_label",
    "similar_title1",
    "similar_title2",
    "similar_title3",
    "similar_title4",
    "similar_title5"
])

df_similar_paper_titles.head()

def add2df(df, atlas_label, similar_titles):
    new_row = {
        "atlas_label": atlas_label,
        "similar_title1": similar_titles[0] if len(similar_titles) > 0 else np.nan,
        "similar_title2": similar_titles[1] if len(similar_titles) > 1 else np.nan,
        "similar_title3": similar_titles[2] if len(similar_titles) > 2 else np.nan,
        "similar_title4": similar_titles[3] if len(similar_titles) > 3 else np.nan,
        "similar_title5": similar_titles[4] if len(similar_titles) > 4 else np.nan,
    }
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    return df

In [10]:
abstract, titles = search_papers_from_brain(networks_embed["AUD"], show_titles=True)

Top matches:
1. Neural correlates of sound externalization.
2. Audio-visual synchrony modulates the ventriloquist illusion and its neural/spatial representation in the auditory cortex.
3. Processing of spectral and amplitude envelope of animal vocalizations in the human auditory cortex.
4. Music listening engages specific cortical regions within the temporal lobes: Differences between musicians and non-musicians.
5. Processing of natural sounds in human auditory cortex: tonotopy, spectral tuning, and relation to voice sensitivity.


In [11]:
for key, item in tqdm(networks_embed.items(), total=len(networks_embed)):
    abstract, titles = search_papers_from_brain(item)
    df_similar_paper_titles = add2df(df_similar_paper_titles, key, titles)

  0%|          | 0/152 [00:00<?, ?it/s]

In [12]:
df_similar_paper_titles.head()

Unnamed: 0,atlas_label,similar_title1,similar_title2,similar_title3,similar_title4,similar_title5
0,VIS-P,Aberrant neurovascular coupling in Leber’s her...,Changes in brain morphology in albinism reflec...,Glucose hypometabolism in the visual cortex pr...,Topography of cortical activation differs for ...,Altered Temporal Dynamic Intrinsic Brain Activ...
1,CG-OP,Does losing money truly hurt? The shared neura...,Cognitive modulation of pain-related brain res...,The interactive effect of social pain and exec...,Social context and perceived agency affects em...,Your pain or mine? Common and distinct neural ...
2,DN-B,Default and executive networks’ roles in diver...,Fairy Tales versus Facts: Genre Matters to the...,Neural correlates of theory-of-mind are associ...,Similar Brain Activation during False Belief T...,Thinking about the future versus the past in p...
3,SMOT-B,Neural correlates of somatosensory paired-puls...,The primary somatosensory cortex largely contr...,No somatotopy of sensorimotor alpha-oscillatio...,Paired pulse depression in the somatosensory c...,Simultaneous electroencephalography and functi...
4,AUD,Neural correlates of sound externalization.,Audio-visual synchrony modulates the ventriloq...,Processing of spectral and amplitude envelope ...,Music listening engages specific cortical regi...,Processing of natural sounds in human auditory...


In [13]:
df_similar_paper_titles.to_csv("~/Desktop/similar_paper_titles.csv", index=False)

In [19]:
df_similar_wiki_titles = pd.DataFrame(columns=[
    "atlas_label",
    "similar_title1",
    "similar_title2",
    "similar_title3",
    "similar_title4",
    "similar_title5"
])

In [15]:
related_wiki = search_wiki_from_brain(networks_embed["AUD"], top_k= 5 ,show_titles=True)

Top matches:
1. Neurocomputational speech processing
2. Speech segmentation
3. Aphonia
4. 2,N,N-TMT
5. Whispering


In [16]:
for key, item in tqdm(networks_embed.items(), total=len(networks_embed)):
    summary, titles = search_wiki_from_brain(item)
    df_similar_wiki_titles = add2df(df_similar_wiki_titles, key, titles)

  0%|          | 0/152 [00:00<?, ?it/s]

In [17]:
df_similar_wiki_titles.to_csv("~/Desktop/similar_wiki_titles.csv", index=False)

In [18]:
df_similar_wiki_titles.head()

Unnamed: 0,atlas_label,similar_title1,similar_title2,similar_title3,similar_title4,similar_title5
0,VIS-P,Visual neuroscience,Pinguecula,,,
1,CG-OP,Propyphenazone/paracetamol/caffeine,Hypoalgesia,,,
2,DN-B,Content analysis,List of social network researchers,,,
3,SMOT-B,Magnetomyography,Whispered pectoriloquy,,,
4,AUD,Neurocomputational speech processing,Speech segmentation,,,
