In [1]:
import sys
import os
import re
import json
import pickle
import gzip

import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from pydantic import BaseModel

import ollama
from ollama import chat

import nibabel as nib
from nilearn import maskers
from nilearn.plotting import view_img
from nilearn.image import resample_img

from neurovlm.data import get_data_dir
from neurovlm.models import Specter
from neurovlm.text_input import search_papers_from_text, search_wiki_from_text, generate_llm_response_from_text
from neurovlm.brain_input import search_papers_from_brain, search_wiki_from_brain, generate_llm_response_from_brain

## Ollama Setup

**1. Install Ollama**

- **macOS:**  
    ```sh
    curl -fsSL https://ollama.com/install.sh | sh
    ```
- **Linux:**  
    ```sh
    curl -fsSL https://ollama.com/install.sh | sh
    ```

**2. Start Ollama Service**
    ```
    ollama serve
    ```
    
**3. Pull a Model (Our default model is qwen2.5:3b-instruct)**
    ```
    ollama pull qwen2.5:3b-instruct
    ```


## Load data and models

In [2]:
# Load data and Specter
data_dir = get_data_dir()
autoencoder = torch.load(data_dir / "autoencoder_sparse.pt", weights_only=False).cpu()

wiki_df = pd.read_parquet(f"{data_dir}/neurowiki_with_ids.parquet")


mask_arrays = np.load(f"{data_dir}/mask.npz", allow_pickle=True)
mask_img = nib.Nifti1Image(mask_arrays["mask"].astype(float),  mask_arrays["affine"])
masker = maskers.NiftiMasker(mask_img=mask_img, dtype=np.float32).fit()

## Example query for text

The titles and abstract most related to the query will be passed to the LM.

The functions expect a string input; no need to encode the input text before passing it in

In [3]:
query = "default mode network"

In [4]:
query

'default mode network'

In [5]:
# Get the top k similar papers
abstract, titles = search_papers_from_text(query, show_titles = True)

There are adapters available but none are activated for the forward pass.


Top matches:
1. Spatio-temporal brain dynamics of self-identity: an EEG source analysis of the current and past self
2. The attribution of animacy and agency in frontotemporal dementia versus Alzheimer's disease.
3. Tracking the dynamic functional connectivity structure of the human brain across the adult lifespan.
4. Finding the vanished self: Perspective modulates neural substrates of self-reflection in Buddhists.
5. Intrinsic Connectivity Identifies the Hippocampus as a Main Crossroad between Alzheimerâ€™s and Semantic Dementia-Targeted Networks


In [6]:
context, titles = search_wiki_from_text("dementia", top_k=10, show_titles = True)

Top matches:
1. Hallmarks of aging
2. Beta wave
3. Biological organisation
4. Low-density lipoprotein receptor-related protein 8
5. Leon Cooper
6. Outline of death
7. Gerontology
8. Biological system
9. Theta model
10. Postmortem caloricity


In [None]:
output = generate_llm_response_from_text(query)

## Example query for Brian Input

The titles and abstract most related to the brain will be passed to the LM.

The functions expects an already encoded brain input

In [11]:
# Load network atlases
with gzip.open(get_data_dir() / f"networks_arrays.pkl.gz", "rb") as f:
    networks = pickle.load(f)

In [12]:
networks = {k: v for _k in networks.keys() for k, v in networks[_k].items()}

In [13]:
networks_resampled = {}

for k in tqdm(networks.keys(), total=len(networks)):
    img = nib.Nifti1Image(networks[k]["array"], affine=networks[k]["affine"])

    if len(np.unique(networks[k]["array"])) == 2:
        # binary data
        img_resampled = resample_img(img, mask_arrays["affine"], interpolation="nearest")
    else:
        img_resampled = resample_img(img, mask_arrays["affine"])
        img_resampled_arr = img_resampled.get_fdata()
        img_resampled_arr[img_resampled_arr < 0] = 0.
        thresh = np.percentile(img_resampled_arr.flatten(), 95)
        img_resampled_arr[img_resampled_arr < thresh] = 0.
        img_resampled_arr[img_resampled_arr >= thresh] = 1.
        img_resampled = nib.Nifti1Image(img_resampled_arr, affine=mask_arrays["affine"])

    networks_resampled[k] = img_resampled

  0%|          | 0/152 [00:00<?, ?it/s]

In [14]:
networks_embed = {}

for k, v in tqdm(networks_resampled.items(), total=len(networks_resampled)):
    networks_embed[k] = autoencoder.encoder(torch.from_numpy(masker.transform(v)))

  0%|          | 0/152 [00:00<?, ?it/s]

In [15]:
# Look for abstract and titles related to a Auditory network
abstract, titles = search_papers_from_brain(networks_embed["AUD"], show_titles=True)

Top matches:
1. Neural correlates of sound externalization.
2. Audio-visual synchrony modulates the ventriloquist illusion and its neural/spatial representation in the auditory cortex.
3. Processing of spectral and amplitude envelope of animal vocalizations in the human auditory cortex.
4. Music listening engages specific cortical regions within the temporal lobes: Differences between musicians and non-musicians.
5. Processing of natural sounds in human auditory cortex: tonotopy, spectral tuning, and relation to voice sensitivity.


In [16]:
summary, titles = search_wiki_from_brain(networks_embed["AUD"], top_k=10, show_titles=True)

Top matches:
1. Neurocomputational speech processing
2. Speech segmentation
3. Aphonia
4. 2,N,N-TMT
5. Whispering
6. Misophonia
7. Aphonogelia
8. Speech error
9. Universal neonatal hearing screening
10. Hypoglossal nerve stimulation


In [None]:
# LLM summary for related abstracts
generate_llm_response_from_brain(networks_embed["AUD"])