# 🧬 NeuroGenAI: DNABERT ➝ Spike ➝ Brian2 SNN Pipeline

## 📎 Source: NCBI FASTA, Hugging Face DNABERT, Brian2 Spiking Model

## 1. Setup & Imports

In [None]:
!pip install brian2
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

from src.nlp.dna_embedding_model import DNAEmbedder
from src.snn.spike_encoder import SpikeEncoder
from src.snn.brian_model import run_brian2_simulation
from src.eval.snn_metrics import evaluate_spikes

## 2. Load & Prepare Sequences

In [5]:
fasta_path = "data/processed/human_fasta_clean.csv"
df = pd.read_csv(fasta_path)
df = df[df['Length'] >= 30].head(100)
sequences = df["Sequence"].tolist()

## 3. DNABERT Embedding

In [None]:
embedder = DNAEmbedder(model_id="armheb/DNA_bert_6", k=6)
embeddings = embedder.embed_batch(sequences)
np.save("data/processed/fasta_dnabert_embeddings.npy", embeddings)

## 4. Normalize + Firing Rates

In [None]:
encoder = SpikeEncoder(rate_max_hz=120)
norm_rates = encoder.normalize_embeddings(embeddings)
firing_rates = encoder.to_firing_rates(norm_rates)
np.save("data/processed/firing_rates.npy", firing_rates)

## 5. Generate Poisson Spike Trains

In [None]:
embedding_path = "data/processed/fasta_dnabert_embeddings.npy"
embeddings = np.load(embedding_path)
encoder = SpikeEncoder(
    rate_max_hz=120,
    duration_ms=500,
    dt_ms=1.0,
    stdp_ready=False,
    seed=42
)
firing_rates = encoder.encode_and_save(
    embeddings=embeddings,
    output_dir= "data/processed",
    prefix="dnabert"
)
spike_train = encoder.generate_poisson_spike_train(
    firing_rates=firing_rates,
    save_path="data/processed/spike_train.npy"
)

## 6. Simulate SNN in Brian2

In [None]:
spike_matrix = np.load("data/processed/spike_train.npy")

M, spikes, neurons = run_brian2_simulation(
    spike_matrix=spike_matrix,
    duration_ms=100,
    syn_weight=0.3,
    stdp=True  # enable learning!
)

# 7. Evaluate SNN Activity

In [None]:
evaluate_spikes(
    path="outputs/snn_sim_results.npz",
    save_path="outputs/snn_metrics.json"
)

# 8. Export Logs

In [None]:
meta = {
    "model_id": embedder.model_id,
    "vector_dim": embeddings.shape[1],
    "n_sequences": len(sequences),
    "fasta_source": fasta_path,
    "rate_max_hz": encoder.rate_max_hz,
    "duration_ms": 100,
    "huggingface_model": f"https://huggingface.co/{embedder.model_id}"
}
with open("data/outputs/embedding_info.json", "w") as f:
    json.dump(meta, f, indent=4)

print("\n✅ Full SNN Pipeline Executed: DNABERT ➝ Spike ➝ SNN ➝ Metrics")