In [1]:
!curl -fsSL https://ollama.com/install.sh | sh


>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [2]:
!nohup ollama serve > /dev/null 2>&1 &

In [3]:
!ollama pull llama3

[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?2

In [4]:
!pip install textgrid

Collecting textgrid
  Downloading TextGrid-1.6.1.tar.gz (9.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: textgrid
  Building wheel for textgrid (setup.py) ... [?25l[?25hdone
  Created wheel for textgrid: filename=TextGrid-1.6.1-py3-none-any.whl size=10146 sha256=cf5951aac4ef3c24af6f8b705ab43684b0977f8e3e762dfadf8e2e0c5ab958d1
  Stored in directory: /root/.cache/pip/wheels/ce/86/7b/5766bd19fa4b4554667dd186e614b5a438ab14eec9c5a3642a
Successfully built textgrid
Installing collected packages: textgrid
Successfully installed textgrid-1.6.1


In [5]:
import torch
import h5py
import numpy as np
import pandas as pd
from textgrid import TextGrid
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.decomposition import PCA


from transformers import CLIPTokenizer, CLIPTextModel

compute_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clip_device = torch.device("cpu")


class FMRIDataset():
    def __init__(self, fmri_path, textgrid_path, TR=1.5):
        self.clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
        self.clip_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32", use_safetensors=True).to("cpu")
        self.TR = TR
        self.fmri = self._load_fmri(fmri_path)
        self.trans_df = self._load_transcript(textgrid_path)
        self.pairs = self._align_fmri_with_text()
        self.X, self.y = self._embed_pairs()
        self.train_loader, self.test_loader = self.get_dataloaders()

    def _load_fmri(self, path):
        with h5py.File(path, "r") as f:
            data = f["data"][:]
        return data

    def _load_transcript(self, path):
        tg = TextGrid.fromFile(path)
        word_tier = next((tier for tier in tg.tiers if "word" in tier.name.lower()), None)

        if word_tier is None:
            raise ValueError("No 'word' tier found in TextGrid.")

        transcript = [
            {
                "word": interval.mark.strip(),
                "start": interval.minTime,
                "end": interval.maxTime
            }
            for interval in word_tier.intervals
            if interval.mark and interval.mark.strip().lower() not in ["", "sp", "sil", "<unk>"]
        ]

        return pd.DataFrame(transcript)

    def _align_fmri_with_text(self):
        pairs = []
        n_TRs = self.fmri.shape[0]

        for t_idx in range(n_TRs):
            t_start = t_idx * self.TR
            t_end = t_start + self.TR

            words_in_tr = self.trans_df[
                            (self.trans_df['end'] > t_start) &
                            (self.trans_df['start'] < t_end)
                        ]

            if not words_in_tr.empty:
                sentence = " ".join(words_in_tr['word'].tolist())
                brain_vec = self.fmri[t_idx]
                pairs.append((brain_vec, sentence))

        return pairs

    def _embed_pairs(self):
        X = []
        sentences = []

        for brain_vec, sentence in self.pairs:
            X.append(brain_vec)
            sentences.append(sentence)

        self.sentences = sentences
        X = np.array(X, dtype=np.float32)

        inputs = self.clip_tokenizer(sentences, return_tensors="pt", padding=True, truncation=True).to(clip_device)
        with torch.no_grad():
            outputs = self.clip_model(**inputs)
            y = outputs.pooler_output.cpu().numpy().astype(np.float32)

        y = y / np.linalg.norm(y, axis=1, keepdims=True)

        return X, y


    def get_dataloaders(self, batch_size=32, test_size=0.2, shuffle=True):
        self.X_train, self.X_test, self.y_train, self.y_test, self.train_sentences, self.test_sentences = train_test_split(
            self.X, self.y, self.sentences, test_size=test_size, shuffle=False
        )

        train_dataset = TensorDataset(torch.from_numpy(self.X_train), torch.from_numpy(self.y_train))
        test_dataset = TensorDataset(torch.from_numpy(self.X_test), torch.from_numpy(self.y_test))

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
        test_loader = DataLoader(test_dataset, batch_size=batch_size)
        return train_loader, test_loader

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.from_numpy(self.y[idx])


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
fmri_path = '/content/drive/MyDrive/dataset/quietfire.hf5'
textgrid_path = '/content/drive/MyDrive/dataset/quietfire.TextGrid'

In [8]:
dataset = FMRIDataset(fmri_path=fmri_path, textgrid_path=textgrid_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

In [9]:
import torch
import torch.nn as nn
import numpy as np
import os
import torch.optim as optim
import torch.nn.functional as F
import h5py

class SpatialFMRI2Embedding(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048),
            nn.Dropout(0.2),

            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),

            nn.Linear(1024, output_dim),
            nn.LayerNorm(output_dim)
        )
        self.optimizer = optim.Adam(self.parameters(), lr=1e-3)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10)
        self.dataset_path = "/content/drive/MyDrive/dataset/"

        self.dataset_paths = self.prepare_file_list(self.dataset_path)

    def forward(self, x):
        return self.model(x)

    def clip_contrastive_loss(self, pred, target, temperature=0.07):

        logits = torch.matmul(pred, target.T) / temperature
        labels = torch.arange(len(pred)).to(pred.device)

        loss_i = F.cross_entropy(logits, labels)
        loss_t = F.cross_entropy(logits.T, labels)
        return (loss_i + loss_t) / 2

    def prepare_file_list(self, dataset_dir):

        all_files = os.listdir(dataset_dir)

        file_pairs = []
        base_names = set(f.split('.')[0] for f in all_files)

        for name in base_names:
            h5f_path = os.path.join(dataset_dir, f"{name}.hf5")
            tg_path = os.path.join(dataset_dir, f"{name}.TextGrid")

            if os.path.exists(h5f_path) and os.path.exists(tg_path):
                try:
                    with h5py.File(h5f_path, 'r') as f:
                        data = f['data'][:]
                        if np.isnan(data).any() or np.isinf(data).any():
                            print(f"Skipping {name} due to NaNs/Infs in {h5f_path}")
                            continue
                except Exception as e:
                    print(f"Error loading {h5f_path}: {e}")
                    continue

                file_pairs.append([h5f_path, tg_path])

        for h5f, tg in file_pairs:
            print(f"H5F: {h5f}  |  TextGrid: {tg}")

        return file_pairs

In [10]:
model = SpatialFMRI2Embedding(dataset.X.shape[1], dataset.y.shape[1])

H5F: /content/drive/MyDrive/dataset/quietfire.hf5  |  TextGrid: /content/drive/MyDrive/dataset/quietfire.TextGrid


In [11]:
model.load_state_dict(torch.load('/content/drive/MyDrive/spatial_mlp_model.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

def get_top_k_similar(pred_vec, all_gt_vecs, all_gt_sentences, k=3):
    sims = cosine_similarity(pred_vec.reshape(1, -1), all_gt_vecs)[0]
    top_k = sims.argsort()[-k:][::-1]
    return [(all_gt_sentences[i], sims[i]) for i in top_k]

model.eval()
sample_vecs = []
pred_sentences = []
ground_truths = []
for i in range(20, 30):
  sample_vec = torch.from_numpy(dataset.X_test[i]).unsqueeze(0)
  sample_vecs.append(sample_vec)

  with torch.no_grad():
      pred_embed = model(sample_vec).cpu().numpy()

  sims = cosine_similarity(pred_embed, dataset.y_test)
  best_idx = np.argmax(sims)

  print("\nPredicted sentence:")
  print(dataset.test_sentences[best_idx])
  pred_sentences.append(dataset.test_sentences[best_idx])

  print("\nGround truth:")
  print(dataset.test_sentences[i])
  ground_truths.append(dataset.test_sentences[i])

  top_preds = get_top_k_similar(pred_embed, dataset.y_test, dataset.test_sentences)
  for sent, score in top_preds:
      print(f"{score:.3f} | {sent}")



Predicted sentence:
{LG}

Ground truth:
HE CAME TOWARDS ME
0.167 | {LG}
0.103 | HAND IT'S
0.096 | {NS} I SPRAYED

Predicted sentence:
{LG} AND THIS ONE

Ground truth:
AND AS HE GOT CLOSER
0.154 | {LG} AND THIS ONE
0.148 | {LG} AND THEN HE BEGAN
0.146 | AND THIS HULKING GOLIATH

Predicted sentence:
AND THIS HULKING GOLIATH

Ground truth:
CLOSER HIS EYES NARROWED
0.140 | AND THIS HULKING GOLIATH
0.090 | {NS} I SPRAYED
0.088 | {LG} AND THIS ONE

Predicted sentence:
STAGGERING TOWARDS THE STREET

Ground truth:
NARROWED AND I COULDN'T
0.144 | STAGGERING TOWARDS THE STREET
0.122 | LUMBERED TOWARDS IT
0.119 | SIZE OF THE OTHER ONE AND

Predicted sentence:
HUMAN ARENA AND

Ground truth:
COULDN'T TELL WHETHER IT WAS BECAUSE
0.132 | HUMAN ARENA AND
0.104 | SIZE OF THE OTHER ONE AND
0.103 | MIME'S LUNCHTIME MENU

Predicted sentence:
AND THIS HULKING GOLIATH

Ground truth:
BECAUSE OF HIS RECOGNIZING
0.141 | AND THIS HULKING GOLIATH
0.127 | HIS {NS} EYES {NS} GOT WILD
0.120 | HAND IT'S

Predicted 

In [13]:
print(pred_sentences)
print(ground_truths)

['{LG}', '{LG} AND THIS ONE', 'AND THIS HULKING GOLIATH', 'STAGGERING TOWARDS THE STREET', 'HUMAN ARENA AND', 'AND THIS HULKING GOLIATH', 'OR WHETHER HE WAS STRATEGIZING', 'GLOWERING AT ME LIKE HE', 'SPRAY AND I SPRAYED HIM IN HIS', 'SPRAY AND I SPRAYED HIM IN HIS']
['HE CAME TOWARDS ME', 'AND AS HE GOT CLOSER', 'CLOSER HIS EYES NARROWED', "NARROWED AND I COULDN'T", "COULDN'T TELL WHETHER IT WAS BECAUSE", 'BECAUSE OF HIS RECOGNIZING', 'RECOGNIZING ME FROM BEFORE', 'BEFORE FROM WHAT HE HAD', 'HAD DONE TO ME OR', 'OR WHETHER HE WAS STRATEGIZING']


In [14]:
import requests, json

predicted_story = ' '.join(pred_sentences)
ground_truth = ' '.join(ground_truths)

url = "http://localhost:11434/api/generate"
payload = {
    "model": "llama3",
    "prompt": f"Below is a predicted story from fMRI signals by a model. They tend to have noise, so sentences might not make complete sense independently. Do your best to summarize the given below text into sentences that makes sense and flow naturally. The total length of the text should be approximately similar to the input. This is the Story: {predicted_story}"
}

resp = requests.post(url, json=payload, stream=True)

output_text = ""
for line in resp.iter_lines():
    if line:
        output_text += json.loads(line)["response"]

print("Predicted sentence:", output_text)

Predicted sentence: Based on the given text, here's a summary that makes sense and flows naturally:

As I stood at the edge of the street arena, I noticed a hulking goliath staggering towards me. His massive size seemed to fill the entire space, and he appeared to be strategizing his next move. Our eyes locked in a fierce stare-down, as if we were about to engage in some sort of primal battle. Suddenly, he charged at me, and I sprang into action, ready to defend myself. In a flash, I sprayed him with... something, unsure what it was or where it came from, but he seemed to be affected by it, stumbling backwards as if reeling from the impact.


In [15]:
payload = {
    "model": "llama3",
    "prompt": f"Below is a ground truth stroy. Do your best to convert the given below text into sentences that makes sense and flow naturally. The total length of the text should be approximately similar to the input. This is the Story: {ground_truth}"
}

resp = requests.post(url, json=payload, stream=True)

output_text = ""
for line in resp.iter_lines():
    if line:
        output_text += json.loads(line)["response"]

print("Ground truth:", output_text)

Ground truth: Here is the rewritten text in a natural flow:

As he approached me, his eyes narrowed and I couldn't help but wonder if it was because he had finally recognized me from our past encounter. Was it possible that he had been replaying what had happened between us, or was he simply strategizing for whatever reason he had come to see me?
