<a href="https://colab.research.google.com/github/navidh86/perturbseq-10701/blob/master/nt_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ONLY FOR COLAB
!git clone https://github.com/navidh86/perturbseq-10701.git
%cd ./perturbseq-10701
!pip install fastparquet tqdm


Cloning into 'perturbseq-10701'...
remote: Enumerating objects: 108, done.[K
remote: Counting objects: 100% (17/17), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 108 (delta 9), reused 7 (delta 4), pack-reused 91 (from 1)[K
Receiving objects: 100% (108/108), 115.03 MiB | 14.93 MiB/s, done.
Resolving deltas: 100% (45/45), done.
Updating files: 100% (31/31), done.
/content/perturbseq-10701
Collecting fastparquet
  Downloading fastparquet-2024.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Downloading fastparquet-2024.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m84.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fastparquet
Successfully installed fastparquet-2024.11.0


In [2]:
!pip install --upgrade git+https://github.com/huggingface/transformers.git

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-1htnbrp0
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-1htnbrp0
  Resolved https://github.com/huggingface/transformers.git to commit 2a61590a479d3b1f77059f75caee7cc22760019d
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting huggingface-hub<2.0,>=1.0.0 (from transformers==5.0.0.dev0)
  Downloading huggingface_hub-1.1.6-py3-none-any.whl.metadata (13 kB)
Downloading huggingface_hub-1.1.6-py3-none-any.whl (516 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m516.1/516.1 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... 

In [3]:
import pandas as pd
import numpy as np
import pickle
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForMaskedLM

import os
import pickle
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
# Get dataloader
from reference_data_alternate import (
    PairPerturbSeqDataset,
    perturbseq_collate,
    get_dataloader
)

train_loader = get_dataloader(type="train", batch_size=4)   # small batch for NT
test_loader  = get_dataloader(type="test", batch_size=4)

print("Train size:", len(train_loader))
print("Test size: ", len(test_loader))


Train size: 236692
Test size:  59174


In [5]:
# Calc summary stats
import numpy as np
from torch.utils.data import DataLoader

train_dataset = PairPerturbSeqDataset(type="train")
loader = DataLoader(train_dataset, batch_size=512, collate_fn=perturbseq_collate)

all_y = []
for _, y in loader:
    all_y.extend(y.numpy())

all_y = np.array(all_y)
mu = all_y.mean()
sigma = all_y.std()

print("mu =", mu)
print("sigma =", sigma)


mu = -0.022736955
sigma = 0.15207928


In [6]:
# define loss fucntion
def weighted_mse_loss(pred, target, mu, sigma, alpha=3.0, threshold=1.0):
    z = (target - mu) / sigma
    weights = torch.where(
        torch.abs(z) > threshold,
        torch.tensor(alpha, device=target.device),
        torch.tensor(1.0, device=target.device)
    )
    mse = (pred - target)**2
    return (weights * mse).sum() / weights.sum()


In [7]:
# !rm -rf /root/.cache/huggingface/transformers
# !rm -rf /root/.cache/huggingface/hub

# !pip install --upgrade git+https://github.com/huggingface/transformers.git


In [8]:

# from transformers import AutoTokenizer
# tok = AutoTokenizer.from_pretrained("InstaDeepAI/nucleotide-transformer-500m-human-ref")
# print(type(tok))
# print(tok.__class__.__name__)
# print("batch_encode_plus" in dir(tok))


##OLD


In [None]:
# NT ENCODER
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForMaskedLM

class NTEncoder(nn.Module):
    def __init__(self, model_name="InstaDeepAI/nucleotide-transformer-500m-human-ref", device="cuda"):
        super().__init__()
        self.device = device

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForMaskedLM.from_pretrained(model_name).to(device)
        self.model.eval()

        self.max_len = self.tokenizer.model_max_length

    @torch.no_grad()
    def forward(self, seq: str):
        seq = seq.upper().replace("U", "T")

        # chunk the sequence
        chunks = [seq[i:i+self.max_len] for i in range(0, len(seq), self.max_len)]
        chunk_embs = []

        for chunk in chunks:
            # Works with all HF models including ESM
            tokens = self.tokenizer(
                [chunk],
                return_tensors="pt",
                padding="max_length",
                max_length=self.max_len,
                truncation=True
            ).to(self.device)

            input_ids = tokens["input_ids"]
            attention_mask = tokens["attention_mask"]

            out = self.model(
                input_ids,
                attention_mask=attention_mask,
                encoder_attention_mask=attention_mask,
                output_hidden_states=True
            )

            # ⭐ FIX: use hidden_states correctly
            hidden = out.hidden_states[-1].squeeze(0)   # (L, D)
            attn = attention_mask.squeeze(0).unsqueeze(-1)  # (L, 1)

            # mean over non-pad tokens
            embed = (hidden * attn).sum(0) / attn.sum()
            chunk_embs.append(embed)

        return torch.stack(chunk_embs).mean(0)


In [None]:
# Biencoder model
class NTBiEncoder(nn.Module):
    def __init__(self, encoder, emb_dim=1280):
        super().__init__()
        self.encoder = encoder
        self.mlp = nn.Sequential(
            nn.Linear(emb_dim * 2, 512),   # 1280*2 = 2560
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    @torch.no_grad()
    def encode(self, seq):
        return self.encoder(seq)

    def forward(self, tf_seqs, gene_seqs):
        tf_embs = []
        gene_embs = []
        for tf, gene in zip(tf_seqs, gene_seqs):
            tf_embs.append(self.encoder(tf))
            gene_embs.append(self.encoder(gene))

        tf_embs = torch.stack(tf_embs)
        gene_embs = torch.stack(gene_embs)
        h = torch.cat([tf_embs, gene_embs], dim=-1)
        return self.mlp(h).squeeze(-1)



##NEW

In [11]:
# import torch
# import torch.nn as nn
# from transformers import AutoTokenizer, AutoModelForMaskedLM

class NTEncoderCLS(nn.Module):
    def __init__(self, model_name="InstaDeepAI/nucleotide-transformer-500m-human-ref", device="cuda"):
        super().__init__()
        self.device = device

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForMaskedLM.from_pretrained(model_name).to(device)
        self.model.eval()

        self.max_len = self.tokenizer.model_max_length

    @torch.no_grad()
    def forward(self, seq: str):
        seq = seq.upper().replace("U", "T")
        embeds = []

        chunks = [seq[i:i+self.max_len] for i in range(0, len(seq), self.max_len)]

        for chunk in chunks:
            tokens = self.tokenizer(
                [chunk],
                return_tensors="pt",
                padding="max_length",
                max_length=self.max_len,
                truncation=True
            ).to(self.device)

            outputs = self.model(
                tokens["input_ids"],
                attention_mask=tokens["attention_mask"],
                output_hidden_states=True
            )

            hidden = outputs.hidden_states[-1].to(self.device)  # (1, L, 1280)

            # ⭐ CLS TOKEN (position 0)
            cls_vec = hidden[:, 0, :].squeeze(0).to(self.device)


            embeds.append(cls_vec)

        return torch.stack(embeds).mean(0)

class AttentionPool(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.att = nn.Linear(dim, 1)

    def forward(self, token_embs, mask):
        # token_embs: (L, D)
        scores = self.att(token_embs).squeeze(-1)     # (L)
        scores = scores.masked_fill(mask == 0, -1e9)  # ignore PAD tokens
        weights = torch.softmax(scores, dim=0).unsqueeze(-1)
        return (weights * token_embs).sum(0)


In [12]:
class NTEncoderAttention(nn.Module):
    def __init__(self, model_name="InstaDeepAI/nucleotide-transformer-500m-human-ref", device="cuda"):
        super().__init__()
        self.device = device

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForMaskedLM.from_pretrained(model_name).to(device)
        self.model.eval()


        self.pool = AttentionPool(1280).to(device)
        self.max_len = self.tokenizer.model_max_length

    @torch.no_grad()
    def forward(self, seq: str):
        seq = seq.upper().replace("U", "T")
        embeds = []

        chunks = [seq[i:i+self.max_len] for i in range(0, len(seq), self.max_len)]

        for chunk in chunks:
            tokens = self.tokenizer(
                [chunk],
                return_tensors="pt",
                padding="max_length",
                max_length=self.max_len,
                truncation=True
            ).to(self.device)

            outputs = self.model(
                tokens["input_ids"],
                attention_mask=tokens["attention_mask"],
                output_hidden_states=True
            )

            hidden = outputs.hidden_states[-1].squeeze(0)  # (L, 1280), on CUDA
            mask = tokens["attention_mask"].squeeze(0)     # (L), on CUDA

            # ⭐ Now pooling is also on CUDA
            att_vec = self.pool(hidden, mask).to(self.device)

            embeds.append(att_vec)

        return torch.stack(embeds).mean(0)


In [13]:
# encoder_mean = NTEncoderMean(device=device)
# encoder_cls  = NTEncoderCLS(device=device)
# encoder_att  = NTEncoderAttention(device=device)


In [14]:
#Function to Save embeddings
# import pickle
# from tqdm import tqdm

# def cache_tf_embeddings(encoder, tf_seq_dict, save_path="tf_embed_cache.pkl"):
#     cache = {}
#     print("Caching TF embeddings...")
#     for tf_name, seq in tqdm(tf_seq_dict.items()):
#         cache[tf_name] = encoder(seq).cpu()
#     pickle.dump(cache, open(save_path, "wb"))
#     print("Saved TF embedding cache to", save_path)


# def cache_gene_embeddings(encoder, gene_seq_dict, save_path="gene_embed_cache.pkl"):
#     cache = {}
#     print("Caching Gene embeddings...")
#     for gene_name, seq in tqdm(gene_seq_dict.items()):
#         cache[gene_name] = encoder(seq).cpu()
#     pickle.dump(cache, open(save_path, "wb"))
#     print("Saved Gene embedding cache to", save_path)

#Function to Save embeddings


def ensure_dir(path):
    if path != "" and not os.path.exists(path):
        os.makedirs(path, exist_ok=True)

def cache_tf_embeddings(encoder, tf_seq_dict, save_path="./embeds/tf_embed_cache_nt_cls.pkl"):
    # FIX: ensure the directory of save_path exists
    ensure_dir(os.path.dirname(save_path))

    cache = {}
    print("Caching TF embeddings...")

    for tf_name, seq in tqdm(tf_seq_dict.items()):
        emb = encoder(seq)
        if hasattr(emb, "cpu"):
            emb = emb.cpu()
        cache[tf_name] = emb

    with open(save_path, "wb") as f:
        pickle.dump(cache, f)

    print(f"Saved TF embedding cache to: {os.path.abspath(save_path)}")


def cache_gene_embeddings(encoder, gene_seq_dict, save_path="./embeds/gene_embed_cache_nt_cls.pkl"):
    # FIX: ensure the directory exists
    ensure_dir(os.path.dirname(save_path))

    cache = {}
    print("Caching gene embeddings...")

    for gene_name, seq in tqdm(gene_seq_dict.items()):
        emb = encoder(seq)
        if hasattr(emb, "cpu"):
            emb = emb.cpu()
        cache[gene_name] = emb

    with open(save_path, "wb") as f:
        pickle.dump(cache, f)

    print(f"Saved Gene embedding cache to: {os.path.abspath(save_path)}")


In [None]:
# Generate and Save embeddings
# import torch
# device = "cuda" if torch.cuda.is_available() else "cpu"

# Load your sequence dictionaries (the same ones the dataloader uses)
tf_seq_dict = pickle.load(open("tf_sequences.pkl", "rb"))
gene_seq_dict = pickle.load(open("gene_sequences_4000bp.pkl", "rb"))

# Initialize encoder
# encoder_mean = NTEncoderMean(device=device)
encoder_cls  = NTEncoderCLS(device=device)
# encoder_att  = NTEncoderAttention(device=device)

# Cache embeddings (takes 3–20 minutes total depending on sizes)
# cache_tf_embeddings(encoder, tf_seq_dict)
# cache_gene_embeddings(encoder, gene_seq_dict)
# cache_tf_embeddings(encoder_att, tf_seq_dict, save_path="./embeds/tf_attn.pkl")
# cache_gene_embeddings(encoder_att, gene_seq_dict, save_path="./embeds/gn_attn.pkl")

cache_tf_embeddings(encoder_cls, tf_seq_dict, save_path="./embeds/tf_cls.pkl")
cache_gene_embeddings(encoder_cls, gene_seq_dict, save_path="./embeds/gn_cls.pkl")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/396 [00:00<?, ?it/s]

EsmForMaskedLM LOAD REPORT from: InstaDeepAI/nucleotide-transformer-500m-human-ref
Key                         | Status     |  | 
----------------------------+------------+--+-
esm.embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Caching TF embeddings...


100%|██████████| 223/223 [06:38<00:00,  1.79s/it]


Saved TF embedding cache to: /content/perturbseq-10701/embeds/tf_cls.pkl
Caching gene embeddings...


100%|██████████| 5307/5307 [1:45:24<00:00,  1.19s/it]


Saved Gene embedding cache to: /content/perturbseq-10701/embeds/gn_cls.pkl


In [16]:
#  Download embeddings
import shutil
from google.colab import files

# Path to your embedding directory
embed_dir = "./embeds"

# Output zip file name
zip_name = "nt_embedding_caches.zip"

# Create zip
shutil.make_archive("nt_embedding_caches", 'zip', embed_dir)

# Download zip
files.download(zip_name)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
# New dataset/dataloader with embeddings
class CachedEmbeddingDataset(torch.utils.data.Dataset):
    def __init__(self, parquet_path, tf_cache_path, gene_cache_path, type="train", train_fraction=0.8, seed=10701):
        df = pd.read_parquet(parquet_path)

        # load caches
        self.tf_cache = pickle.load(open(tf_cache_path, "rb"))
        self.gene_cache = pickle.load(open(gene_cache_path, "rb"))

        # remove entries missing from cache
        df = df[df["tf_name"].isin(self.tf_cache.keys())]
        df = df[df["gene_name"].isin(self.gene_cache.keys())]

        # shuffle + split
        df = df.sample(frac=1.0, random_state=seed)
        n = int(train_fraction * len(df))
        if type == "train":
            self.df = df.iloc[:n].reset_index(drop=True)
        else:
            self.df = df.iloc[n:].reset_index(drop=True)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        tf_emb = self.tf_cache[row["tf_name"]]     # tensor
        gene_emb = self.gene_cache[row["gene_name"]]
        y = torch.tensor(row["expression"], dtype=torch.float32)
        return tf_emb, gene_emb, y


In [None]:
def get_cached_loader(type="train", batch_size=32):
    ds = CachedEmbeddingDataset(
        parquet_path="tf_gene_expression.parquet",
        # tf_cache_path="tf_embed_cache.pkl",
        # gene_cache_path="gene_embed_cache.pkl",
        # tf_cache_path="./embeds/tf_attn.pkl",
        # gene_cache_path="./embeds/gn_attn.pkl",
        tf_cache_path="./embeds/tf_cls.pkl",
        gene_cache_path="./embeds/gn_cls.pkl",
        type=type
    )
    return DataLoader(ds, batch_size=batch_size, shuffle=(type=="train"))

# def get_cached_loader(type="train", batch_size=32):
#     ds = CachedEmbeddingDataset(
#         parquet_path="tf_gene_expression.parquet",
#         tf_cache_path="./embeds/tf_embed_cache.pkl",
#         gene_cache_path="./embeds/gene_embed_cache.pkl",
#         type=type
#     )
#     return DataLoader(ds, batch_size=batch_size, shuffle=(type=="train"))



In [19]:
# # MLP OLD
# class NTBiEncoderFast(nn.Module):
#     def __init__(self, emb_dim=1280):
#         super().__init__()
#         self.mlp = nn.Sequential(
#             nn.Linear(emb_dim * 2, 512),
#             nn.ReLU(),
#             nn.Linear(512, 128),
#             nn.ReLU(),
#             nn.Linear(128, 1)
#         )

#     def forward(self, tf_embs, gene_embs):
#         h = torch.cat([tf_embs, gene_embs], dim=-1)
#         return self.mlp(h).squeeze(-1)


In [20]:
# MLP new
class InteractionMLP(nn.Module):
    def __init__(self, emb_dim=1280):
        super().__init__()

        # TF emb (1280) + gene emb (1280) + interaction (1280)
        in_dim = emb_dim * 3

        self.net = nn.Sequential(
            nn.Linear(in_dim, 2048),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(1024, 512),
            nn.ReLU(),

            nn.Linear(512, 128),
            nn.ReLU(),

            nn.Linear(128, 1)
        )

    def forward(self, tf_emb, gene_emb):
        interaction = tf_emb * gene_emb
        h = torch.cat([tf_emb, gene_emb, interaction], dim=-1)
        return self.net(h).squeeze(-1)


In [21]:
# Updated Training Loop
from tqdm import tqdm

def train_one_epoch_cached_nt(model, loader, optimizer, mu, sigma, device="cuda"):
    model.train()
    total_loss, N = 0.0, 0
    pbar = tqdm(loader)

    for tf_emb, gene_emb, y in pbar:
        tf_emb = tf_emb.to(device)
        gene_emb = gene_emb.to(device)
        y = y.to(device)

        preds = model(tf_emb, gene_emb)
        loss = weighted_mse_loss(preds, y, mu, sigma)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * len(y)
        N += len(y)
        pbar.set_postfix({"loss": loss.item()})

    return total_loss / N




In [22]:
train_loader = get_cached_loader(type="train", batch_size=64)
test_loader = get_cached_loader(type="test", batch_size=64)

# model_nt = NTBiEncoderFast(emb_dim=1280).to(device)
model_nt = InteractionMLP(emb_dim=1280).to(device)
optimizer = torch.optim.Adam(model_nt.parameters(), lr=1e-4)

for epoch in range(10):
    loss = train_one_epoch_cached_nt(model_nt, train_loader, optimizer, mu, sigma)
    print("Epoch", epoch, "Loss", loss)


100%|██████████| 14794/14794 [02:42<00:00, 91.32it/s, loss=0.0372]


Epoch 0 Loss 0.044705884399247685


100%|██████████| 14794/14794 [02:41<00:00, 91.61it/s, loss=0.00958]


Epoch 1 Loss 0.04428124993092301


100%|██████████| 14794/14794 [02:41<00:00, 91.57it/s, loss=0.019]


Epoch 2 Loss 0.044556990857820715


100%|██████████| 14794/14794 [02:40<00:00, 91.95it/s, loss=0.0272]


Epoch 3 Loss 0.04431746634662649


100%|██████████| 14794/14794 [02:40<00:00, 92.33it/s, loss=0.0292]


Epoch 4 Loss 0.04430735037370399


100%|██████████| 14794/14794 [02:40<00:00, 92.26it/s, loss=0.0266]


Epoch 5 Loss 0.04430714858448102


100%|██████████| 14794/14794 [02:40<00:00, 92.13it/s, loss=0.0107]


Epoch 6 Loss 0.04452977164426884


100%|██████████| 14794/14794 [02:40<00:00, 92.27it/s, loss=0.0137]


Epoch 7 Loss 0.044522506047112685


100%|██████████| 14794/14794 [02:40<00:00, 92.16it/s, loss=0.07]


Epoch 8 Loss 0.04455458368194642


100%|██████████| 14794/14794 [02:40<00:00, 92.23it/s, loss=0.0362]

Epoch 9 Loss 0.0445319930269634





In [23]:
def evaluate_nt_cached(model, loader, mu, sigma, device="cuda"):
    model.eval()
    preds_all, y_all = [], []

    with torch.no_grad():
        for tf_emb, gene_emb, y in loader:
            tf_emb = tf_emb.to(device)
            gene_emb = gene_emb.to(device)
            y = y.to(device)

            preds = model(tf_emb, gene_emb)

            preds_all.append(preds.cpu())
            y_all.append(y.cpu())

    preds_all = torch.cat(preds_all)
    y_all = torch.cat(y_all)

    mse = ((preds_all - y_all)**2).mean().item()
    corr = torch.corrcoef(torch.stack([preds_all, y_all]))[0, 1].item()

    # Large-effect subset
    z = (y_all - mu) / sigma
    mask = torch.abs(z) > 1.0

    if mask.sum() > 0:
        mse_big = ((preds_all[mask] - y_all[mask])**2).mean().item()
        corr_big = torch.corrcoef(torch.stack([preds_all[mask], y_all[mask]]))[0, 1].item()
    else:
        mse_big, corr_big = None, None

    return mse, corr, mse_big, corr_big


In [24]:
mse, corr, mse_big, corr_big = evaluate_nt_cached(model_nt, test_loader, mu, sigma, device=device)

print("=== Evaluation Results ===")
print(f"Test MSE:          {mse:.6f}")
print(f"Test Corr:         {corr:.4f}")
print(f"Big-Effect MSE:    {mse_big:.6f}")
print(f"Big-Effect Corr:   {corr_big:.4f}")


=== Evaluation Results ===
Test MSE:          0.020839
Test Corr:         0.0000
Big-Effect MSE:    0.094908
Big-Effect Corr:   0.0001


In [25]:
torch.save(model_nt.state_dict(), "nt_cls_int_model.pt")
print("Model saved as nt_cls_int_model.pt")


Model saved as nt_cls_int_model.pt


In [26]:
from google.colab import files
files.download("nt_cls_int_model.pt")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>