In [None]:
import sys
import os
import re
import json
import pickle
import gzip

import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from pydantic import BaseModel

import ollama
from ollama import chat

import nibabel as nib
from nilearn import maskers
from nilearn.plotting import view_img
from nilearn.image import resample_img

from neurovlm.data import get_data_dir
from neurovlm.models import Specter
from neurovlm.text_input import search_papers_from_text, search_wiki_from_text, generate_llm_response_from_text
from neurovlm.brain_input import search_papers_from_brain, search_wiki_from_brain, generate_llm_response_from_brain

## Ollama Setup

**1. Install Ollama**

- **macOS:**  
    ```sh
    curl -fsSL https://ollama.com/install.sh | sh
    ```
- **Linux:**  
    ```sh
    curl -fsSL https://ollama.com/install.sh | sh
    ```

**2. Start Ollama Service**
    ```
    ollama serve
    ```
    
**3. Pull a Model (Our default model is qwen2.5:3b-instruct)**
    ```
    ollama pull qwen2.5:3b-instruct
    ```


## Load data and models

In [2]:
# Load data and Specter
data_dir = get_data_dir()
autoencoder = torch.load(data_dir / "autoencoder_sparse.pt", weights_only=False).cpu()


mask_arrays = np.load(f"{data_dir}/mask.npz", allow_pickle=True)
mask_img = nib.Nifti1Image(mask_arrays["mask"].astype(float),  mask_arrays["affine"])
masker = maskers.NiftiMasker(mask_img=mask_img, dtype=np.float32).fit()

## Example query for text

The titles and abstract most related to the query will be passed to the LM.

The functions expect a string input; no need to encode the input text before passing it in

In [3]:
query = "what is the role of the hippocampus in memory formation"

In [4]:
# Get the top k similar papers
abstract, titles = search_papers_from_text(query, show_titles = True)

There are adapters available but none are activated for the forward pass.


Top matches:
1. Functional connectivity of amygdala subregions predicts vulnerability to depression following the COVID-19 pandemic
2. Effective Connectivity of Right Amygdala Subregions Predicts Symptom Improvement Following 12-Week Pharmacological Therapy in Major Depressive Disorder
3. Seasonal variations in diet composition, diet breadth and dietary overlap between three commercially important fish species within a flood-pulse system: The Tonle Sap Lake (Cambodia)
4. Relationship between ambient light and glucose metabolism in healthy subjects
5. Genetic variation in apolipoprotein E alters regional gray matter volumes in remitted late-onset depression.


In [5]:
summary, titles = search_wiki_from_text(query, top_k=10, show_titles=True)

Top matches:
1. Leber congenital amaurosis
2. Neil Harbisson
3. CYK algorithm
4. Food psychology
5. Andrew Radford (linguist)
6. Face space
7. Myrmecochory
8. Î”-8-Tetrahydrocannabinol
9. Finno-Ugrian suicide hypothesis
10. Michael S. Brown


In [None]:
output = generate_llm_response_from_text(query)

## Example query for Brian Input

The titles and abstract most related to the brain will be passed to the LM.

The functions expects an already encoded brain input

In [6]:
# Load network atlases
with gzip.open(get_data_dir() / f"networks_arrays.pkl.gz", "rb") as f:
    networks = pickle.load(f)

In [7]:
networks = {k: v for _k in networks.keys() for k, v in networks[_k].items()}

In [8]:
networks_resampled = {}

for k in tqdm(networks.keys(), total=len(networks)):
    img = nib.Nifti1Image(networks[k]["array"], affine=networks[k]["affine"])

    if len(np.unique(networks[k]["array"])) == 2:
        # binary data
        img_resampled = resample_img(img, mask_arrays["affine"], interpolation="nearest")
    else:
        img_resampled = resample_img(img, mask_arrays["affine"])
        img_resampled_arr = img_resampled.get_fdata()
        img_resampled_arr[img_resampled_arr < 0] = 0.
        thresh = np.percentile(img_resampled_arr.flatten(), 95)
        img_resampled_arr[img_resampled_arr < thresh] = 0.
        img_resampled_arr[img_resampled_arr >= thresh] = 1.
        img_resampled = nib.Nifti1Image(img_resampled_arr, affine=mask_arrays["affine"])

    networks_resampled[k] = img_resampled

  0%|          | 0/152 [00:00<?, ?it/s]

In [9]:
networks_embed = {}

for k, v in tqdm(networks_resampled.items(), total=len(networks_resampled)):
    networks_embed[k] = autoencoder.encoder(torch.from_numpy(masker.transform(v)))

  0%|          | 0/152 [00:00<?, ?it/s]

In [None]:
# Look for abstract and titles related to a Auditory network
abstract, titles = search_papers_from_brain(networks_embed["AUD"], show_titles=True)

Top matches:
1. Remembering with gains and losses: effects of monetary reward and punishment on successful encoding activation of source memories.
2. Neuroimaging of valence decisions in children and adults
3. Bacterial and Archaea Community Present in the Pine Barrens Forest of Long Island, NY: Unusually High Percentage of Ammonia Oxidizing Bacteria


In [12]:
summary, titles = search_wiki_from_brain(networks_embed["AUD"], top_k=10, show_titles=True)

Top matches:
1. GNA12
2. PSMD12
3. Suicide attempt
4. GNB1
5. BCL10
6. Zaspopathy
7. GRB10
8. Pituitary disease
9. TDP-43
10. Clobenpropit


In [None]:
# LLM summary for related abstracts
generate_llm_response_from_brain(networks_embed["AUD"])

Encoded text norm shape: torch.Size([384]) and latent: torch.Size([27501, 384])
Top 10 publications for brain-derived input
[1] Processing the acoustic effect of size in speech sounds
[2] Cortical dynamics of acoustic and phonological processing in speech perception.
[3] Preference for Audiovisual Speech Congruency in Superior Temporal Cortex.
[4] Hierarchical organization of human auditory cortex: evidence from acoustic invariance in the response to intelligible speech.
[5] Hemispheric specialization for processing auditory nonspeech stimuli.
[6] Neural basis of auditory expectation within temporal cortex.
[7] Cortical Plasticity after Cochlear Implantation
[8] Whispering - The hidden side of auditory communication.
[9] Altered cortical and subcortical connectivity due to infrasound administered near the hearing threshold - Evidence from fMRI.
[10] Bilateral reorganization of posterior temporal cortices in post-lingual deafness  and its relation to cochlear implant outcome.
LLM writin

"### Overview\n\nThe publications provide insights into various aspects of brain function and its interactions with acoustic signals, revealing distinct areas of specialization within the auditory cortex while also highlighting regions involved in higher-level processes such as phonological representation and emotional processing. They collectively emphasize hierarchical organization within the auditory system, lateralization effects, and neural plasticity following cochlear implantation.\n\n### Publication 1: Sensory Encoding of Acoustic Effect of Vocal Tract Length\n\nThis study investigates how humans perceive the acoustic effect of vocal tract length (VTL), which reflects size information based on sound production. A mechanism involving a time-stabilized spectral scaling is proposed as the neural correlate for processing this sensory cue, with evidence pointing to the MGB and non-primary auditory cortex in planum temporale and anterior superior temporal gyrus being involved.\n\n###