In [3]:
!pip install --upgrade numpy==1.26.4



In [4]:
%pip -q install transformers accelerate datasets peft bitsandbytes huggingface_hub gradio sentencepiece
%pip -q install --upgrade "transformers[audio]"  # ASR/TTS pipelines

In [6]:
%pip -q install "transformers[audio]" reportlab

# --- Install (kept minimal) ---

In [1]:
import os, time, json, math, torch, threading
import numpy as np
import gradio as gr
from typing import List, Dict, Optional
from huggingface_hub import login
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
    BitsAndBytesConfig,
    pipeline
)

In [2]:


from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.units import cm
from reportlab.lib import enums



# ---------- CONFIG ----------

In [23]:
MODEL_ID = "ruchirnamjoshi/BioMistralFinetuned"   # your fine-tuned model
SYSTEM_PROMPT = (
    "You are an expert wet‑lab protocol planner for pharma/biotech. "
    "When asked to generate a protocol, ALWAYS return a structured document with sections in this order:\n"
    "1) Title\n2) Objective\n3) Materials (bulleted)\n4) Safety & PPE\n5) Step-by-step Procedure (single-level numbered 1., 2., 3., ...)\n"
    "6) Quality Checks/Acceptance Criteria\n7) Troubleshooting Tips\n8) Notes & References\n"
    "For steps, be concrete and executable (volumes, temps, times)."
    "Avoid extra commentary. Keep it within 700-900 words unless user says otherwise."
)

INSTRUCTION = (
    "Given a goal and optional constraints/context, produce the structured protocol as specified. "
    "Avoid extra commentary. Keep it within 700-900 words unless user says otherwise."
)
RESPONSE_HEADER = "### Response:\n"

In [4]:
from google.colab import userdata
HF_USER = "ruchirnamjoshi"
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# ---------- Load LLM (4-bit) ----------

In [5]:
from peft import PeftModel

BASE_MODEL = "BioMistral/BioMistral-7B"
MODEL_ID   = "ruchirnamjoshi/BioMistralFinetuned"  # your adapter repo

device = "cuda" if torch.cuda.is_available() else "cpu"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
)
base.config.use_cache = True   # enable cache for inference (faster)
base.eval()




tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/567 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/14.5G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Mist

In [6]:
model = PeftModel.from_pretrained(base, MODEL_ID)
model.eval()

adapter_config.json:   0%|          | 0.00/890 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/51.4M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj)

In [10]:
from transformers import StoppingCriteria, StoppingCriteriaList, LogitsProcessorList, RepetitionPenaltyLogitsProcessor, NoRepeatNGramLogitsProcessor


# ---------- Audio pipelines (optional, loaded lazily) ----------

In [24]:


# -------------- Mic → ASR (keep; TTS removed) --------------
_asr = None
def get_asr():
    global _asr
    if _asr is None:
        _asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
    return _asr

def transcribe_audio(mic_audio):
    if mic_audio is None: return ""
    asr = get_asr()
    if isinstance(mic_audio, dict) and "sample_rate" in mic_audio:
        sr, arr = mic_audio["sample_rate"], mic_audio["array"]
        out = asr({"sampling_rate": sr, "raw": arr})
    elif isinstance(mic_audio, tuple):
        sr, arr = mic_audio
        out = asr({"sampling_rate": sr, "raw": arr})
    else:
        out = asr(mic_audio)
    return out.get("text","").strip()

# -------------- Prompt & post-processing --------------
def build_prompt(history: List[Dict], goal: str, constraints: str, extra_context: str, force_sections: bool=True) -> str:
    convo = ""
    for turn in history[-5:]:
        role = turn.get("role","user")
        content = (turn.get("content") or "").strip()
        if not content: continue
        tag = "User" if role=="user" else "Assistant"
        convo += f"\n### {tag}:\n{content}\n"
    goal_block = goal.strip()
    if constraints.strip():
        goal_block += f"\n\nConstraints:\n{constraints.strip()}"
    if extra_context.strip():
        goal_block += f"\n\nAdditional Context:\n{extra_context.strip()}"
    guide = "\n(Return structured sections exactly as specified.)" if force_sections else ""
    return (
        f"### System:\n{SYSTEM_PROMPT}{guide}\n\n"
        f"### Instruction:\n{INSTRUCTION}\n"
        f"{convo}\n"
        f"### Input:\n{goal_block}\n\n"
        f"{RESPONSE_HEADER}"
    )

def ensure_numbering(text: str) -> str:
    lines = [l.strip() for l in text.splitlines()]
    if not lines: return ""
    fixed, in_steps, idx = [], False, 1
    for l in lines:
        lo = l.lower().strip()
        if lo.startswith("step-by-step") or "procedure" in lo:
            in_steps = True
            fixed.append(l); continue
        if in_steps and l and (l[:2].isdigit() or l.startswith(tuple([f"{i}." for i in range(1,10)]))):
            fixed.append(f"{idx}. {l.lstrip('0123456789. ').strip()}"); idx += 1
        else:
            fixed.append(l)
    return "\n".join(fixed).strip()

# -------------- Anti-repetition + explicit stopping --------------
STOP_STRS = [
    "\n###",  # next chat turn
    "\nTitle", "\nObjective", "\nMaterials", "\nSafety", "\nStep-by-step", "\nProcedure",
    "\nQuality", "\nTroubleshooting", "\nNotes", "\nReferences"
]
class MultiStringStopper(StoppingCriteria):
    def __init__(self, stop_strings, tokenizer):
        self.stop_ids = [tokenizer.encode(s, add_special_tokens=False) for s in stop_strings]
    def __call__(self, input_ids, scores, **kwargs):
        seq = input_ids[0].tolist()
        for s in self.stop_ids:
            if len(seq) >= len(s) and seq[-len(s):] == s:
                return True
        return False

def seed_everything(seed: Optional[int]):
    if seed is None or seed < 0: return
    torch.manual_seed(int(seed))
    np.random.seed(int(seed))

# -------------- Streaming generate with processors & stopper --------------
def stream_generate(prompt: str, temperature: float, top_p: float, max_new_tokens: int, seed: Optional[int]):
    seed_everything(seed)
    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    processors = LogitsProcessorList([
        RepetitionPenaltyLogitsProcessor(1.15),
        NoRepeatNGramLogitsProcessor(6),
    ])


    gen_kwargs = dict(
        **inputs,
        max_new_tokens=int(max_new_tokens),
        do_sample=(temperature>0),
        temperature=float(temperature),
        top_p=float(top_p),
        eos_token_id=tokenizer.eos_token_id,
        streamer=streamer,
        pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
        logits_processor=processors
    )

    t = threading.Thread(target=model.generate, kwargs=gen_kwargs)
    t.start()

    buf = ""
    for new_text in streamer:
        buf += new_text
        if "\n" in buf:
            chunks = buf.split("\n")
            for c in chunks[:-1]:
                yield c + "\n"
            buf = chunks[-1]
    if buf:
        yield buf

# -------------- PDF export --------------
def save_pdf(history: List[Dict], protocol: str) -> str:
    from reportlab.lib import styles
    ts = time.strftime("%Y%m%d-%H%M%S")
    path = f"/content/protocol_{ts}.pdf"
    doc = SimpleDocTemplate(path, pagesize=A4, rightMargin=2*cm, leftMargin=2*cm, topMargin=1.5*cm, bottomMargin=1.5*cm)
    styles = getSampleStyleSheet()
    styles["BodyText"].alignment = enums.TA_LEFT
    flow = []
    flow.append(Paragraph("<b>Protocol Draft</b>", styles["Title"]))
    flow.append(Spacer(1, 0.3*cm))
    flow.append(Paragraph("<b>Conversation (recent turns)</b>", styles["Heading2"]))
    for turn in history[-10:]:
        role = turn.get("role","user").capitalize()
        content = (turn.get("content","") or "").replace("\n","<br/>")
        flow.append(Paragraph(f"<b>{role}:</b> {content}", styles["BodyText"]))
        flow.append(Spacer(1, 0.2*cm))
    flow.append(PageBreak())
    flow.append(Paragraph("<b>Model Output (Structured Protocol)</b>", styles["Heading2"]))
    for line in (protocol or "").splitlines():
        if not line.strip():
            flow.append(Spacer(1, 0.15*cm)); continue
        if line.lower().startswith(("title","objective","materials","safety","step-by-step","procedure","quality","troubleshooting","notes")):
            flow.append(Paragraph(f"<b>{line}</b>", styles["BodyText"]))
        else:
            flow.append(Paragraph(line, styles["BodyText"]))
    doc.build(flow)
    return path



# ---------------- Gradio UI ----------------

In [29]:

with gr.Blocks(css="#proto { white-space: pre-wrap; font-family: ui-monospace, Menlo, monospace; }") as demo:
    gr.Markdown("## 🧪 Wet‑Lab Protocol Assistant ")

    with gr.Row():
        with gr.Column(scale=2):
            goal = gr.Textbox(label="Goal", lines=3, placeholder="e.g., Test inhibitory effect of Compound X on E. coli growth")
            constraints = gr.Textbox(label="Constraints / Context (optional)", lines=3, placeholder="Equipment, reagents, safety limits, etc.")
            extra = gr.Textbox(label="Extra Context (optional)", lines=3, placeholder="Paste any relevant notes here.")
            force_sections = gr.Checkbox(value=True, label="Force structured sections (Title, Objective, Materials, Safety, Steps, QC, Troubleshooting, Notes)")

            # Audio input (ASR)
            mic = gr.Audio(sources=["microphone"], label="Speak your goal", type="numpy")
            asr_btn = gr.Button("Transcribe Mic → Goal")

            # Decoding controls
            with gr.Row():
                temperature = gr.Slider(0.0, 1.5, value=0.3, step=0.05, label="Temperature")
                top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top‑p")
            with gr.Row():
                max_new = gr.Slider(64, 2048, value=768, step=32, label="Max new tokens")
                seed = gr.Number(value=42, label="Seed (‑1 random)", precision=0)

            # Actions
            with gr.Row():
                run_btn = gr.Button("Generate Protocol", variant="primary")
                compare_btn = gr.Button("Compare Variants")
                clear_btn = gr.Button("Clear Session")
                save_pdf_btn = gr.Button("Save as PDF")

        with gr.Column(scale=3):
            proto = gr.Textbox(label="Protocol", lines=40, elem_id="proto")
            with gr.Accordion("Conversation (this session)", open=False):
                history_box = gr.JSON(label="Turns")


        with gr.Column():
            gr.Markdown("### Compare Variants")
            with gr.Column():
                temperature_b = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature (B)")
                top_p_b = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (B)")
                seed_b = gr.Number(value=123, label="Seed (B)", precision=0)
            with gr.Column():
                proto_a = gr.Textbox(label="Variant A", lines=14)
                proto_b = gr.Textbox(label="Variant B", lines=14)

    # Session state
    state = gr.State([])

    # Handlers
    def handle_asr(mic_audio, s):
        text = transcribe_audio(mic_audio)
        if text:
            s = s + [{"role":"user", "content": text}]
        return text, s, s
    asr_btn.click(handle_asr, inputs=[mic, state], outputs=[goal, state, history_box])

    def run_model(goal_text, constraints_text, extra_text, s, temperature, top_p, max_new, seed, force_sections):
        prompt = build_prompt(s, goal_text or "", constraints_text or "", extra_text or "", force_sections=force_sections)
        s = s + [{"role":"user", "content": (goal_text or "").strip()}]

        streamed = ""
        for chunk in stream_generate(prompt, float(temperature), float(top_p), int(max_new), int(seed) if seed is not None else -1):
            streamed += chunk
            yield streamed, s  # stream text only

        final_text = ensure_numbering(streamed)
        s = s + [{"role":"assistant", "content": final_text}]
        yield final_text, s

    run_btn.click(
        run_model,
        inputs=[goal, constraints, extra, state, temperature, top_p, max_new, seed, force_sections],
        outputs=[proto, history_box]
    )

    def compare(goal_text, constraints_text, extra_text, s, temp_a, top_a, seed_a, temp_b, top_b, seed_b, force_sections):
        base = build_prompt(s, goal_text or "", constraints_text or "", extra_text or "", force_sections=force_sections)

        # Variant A
        seed_everything(int(seed_a) if seed_a is not None else -1)
        ia = tokenizer([base], return_tensors="pt").to(model.device)
        with torch.inference_mode():
            ga = model.generate(
                **ia, max_new_tokens=768,
                do_sample=(temp_a>0), temperature=float(temp_a), top_p=float(top_a),
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
                logits_processor=LogitsProcessorList([RepetitionPenaltyLogitsProcessor(1.15), NoRepeatNGramLogitsProcessor(6)]),
                stopping_criteria=StoppingCriteriaList([MultiStringStopper(STOP_STRS, tokenizer)]),
            )
        out_a = tokenizer.decode(ga[0], skip_special_tokens=True).split(RESPONSE_HEADER,1)[-1].strip()
        out_a = ensure_numbering(out_a)

        # Variant B
        seed_everything(int(seed_b) if seed_b is not None else -1)
        ib = tokenizer([base], return_tensors="pt").to(model.device)
        with torch.inference_mode():
            gb = model.generate(
                **ib, max_new_tokens=768,
                do_sample=(temp_b>0), temperature=float(temp_b), top_p=float(top_b),
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
                logits_processor=LogitsProcessorList([RepetitionPenaltyLogitsProcessor(1.15), NoRepeatNGramLogitsProcessor(6)]),
                stopping_criteria=StoppingCriteriaList([MultiStringStopper(STOP_STRS, tokenizer)]),
            )
        out_b = tokenizer.decode(gb[0], skip_special_tokens=True).split(RESPONSE_HEADER,1)[-1].strip()
        out_b = ensure_numbering(out_b)

        return out_a, out_b

    compare_btn.click(
        compare,
        inputs=[goal, constraints, extra, state, temperature, top_p, seed, temperature_b, top_p_b, seed_b, force_sections],
        outputs=[proto_a, proto_b]
    )

    def do_clear():
        return "", "", "", [], ""
    clear_btn.click(do_clear, outputs=[goal, constraints, extra, state, proto])

    def do_save_pdf(s, text):
        path = save_pdf(s, text or "")
        return path
    saved_pdf = gr.File(label="Download PDF")
    save_pdf_btn.click(do_save_pdf, inputs=[state, proto], outputs=[saved_pdf])

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1195158184cd48e30a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# ---------IGNORE----------------IGNORE----------------IGNORE-----------------IGNORE----------------------


In [30]:
!pip -q install -U huggingface_hub gradio

['\x1b[?25l   \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.0/59.6 MB\x1b[0m \x1b[31m?\x1b[0m eta \x1b[36m-:--:--\x1b[0m',
 '\x1b[2K   \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.5/59.6 MB\x1b[0m \x1b[31m29.0 MB/s\x1b[0m eta \x1b[36m0:00:03\x1b[0m',
 '\x1b[2K   \x1b[91m━━\x1b[0m\x1b[91m╸\x1b[0m\x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m4.3/59.6 MB\x1b[0m \x1b[31m69.1 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K   \x1b[91m━━━━━━\x1b[0m\x1b[90m╺\x1b[0m\x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m9.3/59.6 MB\x1b[0m \x1b[31m94.3 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K   \x1b[91m━━━━━━━━━\x1b[0m\x1b[91m╸\x1b[0m\x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m14.3/59.6 MB\x1b[0m \x1b[31m139.7 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K   \x1b[91m━━━━━━━━━━━\x1b[0m\x1b[91m╸\x1b[0m\x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m17.9/59.6 MB\x1b[0m \x1b[31m122.0 MB/s\x1b[0m eta \x1b[36m0:00:

In [38]:
import os, textwrap, json, time
SPACE_DIR = "/content/space"
os.makedirs(SPACE_DIR, exist_ok=True)

APP_CODE = r'''
import os, time, threading, numpy as np, torch, gradio as gr
from typing import List, Dict, Optional
from transformers import (
    AutoModelForCausalLM, AutoTokenizer,
    BitsAndBytesConfig, TextIteratorStreamer,
    StoppingCriteria, StoppingCriteriaList,
    LogitsProcessorList, RepetitionPenaltyLogitsProcessor,
    NoRepeatNGramLogitsProcessor
)
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.units import cm
from reportlab.lib import enums


SYSTEM_PROMPT = (
    "You are an expert wet‑lab protocol planner for pharma/biotech. "
    "When asked to generate a protocol, ALWAYS return a structured document with sections in this order:\n"
    "1) Title\n2) Objective\n3) Materials (bulleted)\n4) Safety & PPE\n5) Step-by-step Procedure (single-level numbered 1., 2., 3., ...)\n"
    "6) Quality Checks/Acceptance Criteria\n7) Troubleshooting Tips\n8) Notes & References\n"
    "For steps, be concrete and executable (volumes, temps, times)."
    "Avoid extra commentary. Keep it within 700-900 words unless user says otherwise."
)

INSTRUCTION = (
    "Given a goal and optional constraints/context, produce the structured protocol as specified. "
    "Avoid extra commentary. Keep it within 700-900 words unless user says otherwise."
)
RESPONSE_HEADER = "### Response:\n"

from peft import PeftModel

BASE_MODEL = "BioMistral/BioMistral-7B"
MODEL_ID   = "ruchirnamjoshi/BioMistralFinetuned"  # your adapter repo

device = "cuda" if torch.cuda.is_available() else "cpu"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
)
base.config.use_cache = True   # enable cache for inference (faster)
base.eval()


model = PeftModel.from_pretrained(base, MODEL_ID)

model.eval()



# -------------- Mic → ASR (keep; TTS removed) --------------
_asr = None
def get_asr():
    global _asr
    if _asr is None:
        _asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
    return _asr

def transcribe_audio(mic_audio):
    if mic_audio is None: return ""
    asr = get_asr()
    if isinstance(mic_audio, dict) and "sample_rate" in mic_audio:
        sr, arr = mic_audio["sample_rate"], mic_audio["array"]
        out = asr({"sampling_rate": sr, "raw": arr})
    elif isinstance(mic_audio, tuple):
        sr, arr = mic_audio
        out = asr({"sampling_rate": sr, "raw": arr})
    else:
        out = asr(mic_audio)
    return out.get("text","").strip()

# -------------- Prompt & post-processing --------------
def build_prompt(history: List[Dict], goal: str, constraints: str, extra_context: str, force_sections: bool=True) -> str:
    convo = ""
    for turn in history[-5:]:
        role = turn.get("role","user")
        content = (turn.get("content") or "").strip()
        if not content: continue
        tag = "User" if role=="user" else "Assistant"
        convo += f"\n### {tag}:\n{content}\n"
    goal_block = goal.strip()
    if constraints.strip():
        goal_block += f"\n\nConstraints:\n{constraints.strip()}"
    if extra_context.strip():
        goal_block += f"\n\nAdditional Context:\n{extra_context.strip()}"
    guide = "\n(Return structured sections exactly as specified.)" if force_sections else ""
    return (
        f"### System:\n{SYSTEM_PROMPT}{guide}\n\n"
        f"### Instruction:\n{INSTRUCTION}\n"
        f"{convo}\n"
        f"### Input:\n{goal_block}\n\n"
        f"{RESPONSE_HEADER}"
    )

def ensure_numbering(text: str) -> str:
    lines = [l.strip() for l in text.splitlines()]
    if not lines: return ""
    fixed, in_steps, idx = [], False, 1
    for l in lines:
        lo = l.lower().strip()
        if lo.startswith("step-by-step") or "procedure" in lo:
            in_steps = True
            fixed.append(l); continue
        if in_steps and l and (l[:2].isdigit() or l.startswith(tuple([f"{i}." for i in range(1,10)]))):
            fixed.append(f"{idx}. {l.lstrip('0123456789. ').strip()}"); idx += 1
        else:
            fixed.append(l)
    return "\n".join(fixed).strip()

# -------------- Anti-repetition + explicit stopping --------------
STOP_STRS = [
    "\n###",  # next chat turn
    "\nTitle", "\nObjective", "\nMaterials", "\nSafety", "\nStep-by-step", "\nProcedure",
    "\nQuality", "\nTroubleshooting", "\nNotes", "\nReferences"
]
class MultiStringStopper(StoppingCriteria):
    def __init__(self, stop_strings, tokenizer):
        self.stop_ids = [tokenizer.encode(s, add_special_tokens=False) for s in stop_strings]
    def __call__(self, input_ids, scores, **kwargs):
        seq = input_ids[0].tolist()
        for s in self.stop_ids:
            if len(seq) >= len(s) and seq[-len(s):] == s:
                return True
        return False

def seed_everything(seed: Optional[int]):
    if seed is None or seed < 0: return
    torch.manual_seed(int(seed))
    np.random.seed(int(seed))

# -------------- Streaming generate with processors & stopper --------------
def stream_generate(prompt: str, temperature: float, top_p: float, max_new_tokens: int, seed: Optional[int]):
    seed_everything(seed)
    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    processors = LogitsProcessorList([
        RepetitionPenaltyLogitsProcessor(1.15),
        NoRepeatNGramLogitsProcessor(6),
    ])


    gen_kwargs = dict(
        **inputs,
        max_new_tokens=int(max_new_tokens),
        do_sample=(temperature>0),
        temperature=float(temperature),
        top_p=float(top_p),
        eos_token_id=tokenizer.eos_token_id,
        streamer=streamer,
        pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
        logits_processor=processors
    )

    t = threading.Thread(target=model.generate, kwargs=gen_kwargs)
    t.start()

    buf = ""
    for new_text in streamer:
        buf += new_text
        if "\n" in buf:
            chunks = buf.split("\n")
            for c in chunks[:-1]:
                yield c + "\n"
            buf = chunks[-1]
    if buf:
        yield buf

# -------------- PDF export --------------
def save_pdf(history: List[Dict], protocol: str) -> str:
    from reportlab.lib import styles
    ts = time.strftime("%Y%m%d-%H%M%S")
    path = f"/content/protocol_{ts}.pdf"
    doc = SimpleDocTemplate(path, pagesize=A4, rightMargin=2*cm, leftMargin=2*cm, topMargin=1.5*cm, bottomMargin=1.5*cm)
    styles = getSampleStyleSheet()
    styles["BodyText"].alignment = enums.TA_LEFT
    flow = []
    flow.append(Paragraph("<b>Protocol Draft</b>", styles["Title"]))
    flow.append(Spacer(1, 0.3*cm))
    flow.append(Paragraph("<b>Conversation (recent turns)</b>", styles["Heading2"]))
    for turn in history[-10:]:
        role = turn.get("role","user").capitalize()
        content = (turn.get("content","") or "").replace("\n","<br/>")
        flow.append(Paragraph(f"<b>{role}:</b> {content}", styles["BodyText"]))
        flow.append(Spacer(1, 0.2*cm))
    flow.append(PageBreak())
    flow.append(Paragraph("<b>Model Output (Structured Protocol)</b>", styles["Heading2"]))
    for line in (protocol or "").splitlines():
        if not line.strip():
            flow.append(Spacer(1, 0.15*cm)); continue
        if line.lower().startswith(("title","objective","materials","safety","step-by-step","procedure","quality","troubleshooting","notes")):
            flow.append(Paragraph(f"<b>{line}</b>", styles["BodyText"]))
        else:
            flow.append(Paragraph(line, styles["BodyText"]))
    doc.build(flow)
    return path


# ---------------- Gradio UI (no TTS) ----------------
with gr.Blocks(css="#proto { white-space: pre-wrap; font-family: ui-monospace, Menlo, monospace; }") as demo:
    gr.Markdown("## 🧪 Wet‑Lab Protocol Assistant ")

    with gr.Row():
        with gr.Column(scale=2):
            goal = gr.Textbox(label="Goal", lines=3, placeholder="e.g., Test inhibitory effect of Compound X on E. coli growth")
            constraints = gr.Textbox(label="Constraints / Context (optional)", lines=3, placeholder="Equipment, reagents, safety limits, etc.")
            extra = gr.Textbox(label="Extra Context (optional)", lines=3, placeholder="Paste any relevant notes here.")
            force_sections = gr.Checkbox(value=True, label="Force structured sections (Title, Objective, Materials, Safety, Steps, QC, Troubleshooting, Notes)")

            # Audio input (ASR)
            mic = gr.Audio(sources=["microphone"], label="Speak your goal", type="numpy")
            asr_btn = gr.Button("Transcribe Mic → Goal")

            # Decoding controls
            with gr.Row():
                temperature = gr.Slider(0.0, 1.5, value=0.3, step=0.05, label="Temperature")
                top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top‑p")
            with gr.Row():
                max_new = gr.Slider(64, 2048, value=768, step=32, label="Max new tokens")
                seed = gr.Number(value=42, label="Seed (‑1 random)", precision=0)

            # Actions
            with gr.Row():
                run_btn = gr.Button("Generate Protocol", variant="primary")
                compare_btn = gr.Button("Compare Variants")
                clear_btn = gr.Button("Clear Session")
                save_pdf_btn = gr.Button("Save as PDF")

        with gr.Column(scale=3):
            proto = gr.Textbox(label="Protocol", lines=40, elem_id="proto")
            with gr.Accordion("Conversation (this session)", open=False):
                history_box = gr.JSON(label="Turns")


        with gr.Column():
            gr.Markdown("### Compare Variants")
            with gr.Column():
                temperature_b = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature (B)")
                top_p_b = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top‑p (B)")
                seed_b = gr.Number(value=123, label="Seed (B)", precision=0)
            with gr.Column():
                proto_a = gr.Textbox(label="Variant A", lines=14)
                proto_b = gr.Textbox(label="Variant B", lines=14)

    # Session state
    state = gr.State([])

    # Handlers
    def handle_asr(mic_audio, s):
        text = transcribe_audio(mic_audio)
        if text:
            s = s + [{"role":"user", "content": text}]
        return text, s, s
    asr_btn.click(handle_asr, inputs=[mic, state], outputs=[goal, state, history_box])

    def run_model(goal_text, constraints_text, extra_text, s, temperature, top_p, max_new, seed, force_sections):
        prompt = build_prompt(s, goal_text or "", constraints_text or "", extra_text or "", force_sections=force_sections)
        s = s + [{"role":"user", "content": (goal_text or "").strip()}]

        streamed = ""
        for chunk in stream_generate(prompt, float(temperature), float(top_p), int(max_new), int(seed) if seed is not None else -1):
            streamed += chunk
            yield streamed, s  # stream text only

        final_text = ensure_numbering(streamed)
        s = s + [{"role":"assistant", "content": final_text}]
        yield final_text, s

    run_btn.click(
        run_model,
        inputs=[goal, constraints, extra, state, temperature, top_p, max_new, seed, force_sections],
        outputs=[proto, history_box]
    )

    def compare(goal_text, constraints_text, extra_text, s, temp_a, top_a, seed_a, temp_b, top_b, seed_b, force_sections):
        base = build_prompt(s, goal_text or "", constraints_text or "", extra_text or "", force_sections=force_sections)

        # Variant A
        seed_everything(int(seed_a) if seed_a is not None else -1)
        ia = tokenizer([base], return_tensors="pt").to(model.device)
        with torch.inference_mode():
            ga = model.generate(
                **ia, max_new_tokens=768,
                do_sample=(temp_a>0), temperature=float(temp_a), top_p=float(top_a),
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
                logits_processor=LogitsProcessorList([RepetitionPenaltyLogitsProcessor(1.15), NoRepeatNGramLogitsProcessor(6)]),
                stopping_criteria=StoppingCriteriaList([MultiStringStopper(STOP_STRS, tokenizer)]),
            )
        out_a = tokenizer.decode(ga[0], skip_special_tokens=True).split(RESPONSE_HEADER,1)[-1].strip()
        out_a = ensure_numbering(out_a)

        # Variant B
        seed_everything(int(seed_b) if seed_b is not None else -1)
        ib = tokenizer([base], return_tensors="pt").to(model.device)
        with torch.inference_mode():
            gb = model.generate(
                **ib, max_new_tokens=768,
                do_sample=(temp_b>0), temperature=float(temp_b), top_p=float(top_b),
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
                logits_processor=LogitsProcessorList([RepetitionPenaltyLogitsProcessor(1.15), NoRepeatNGramLogitsProcessor(6)]),
                stopping_criteria=StoppingCriteriaList([MultiStringStopper(STOP_STRS, tokenizer)]),
            )
        out_b = tokenizer.decode(gb[0], skip_special_tokens=True).split(RESPONSE_HEADER,1)[-1].strip()
        out_b = ensure_numbering(out_b)

        return out_a, out_b

    compare_btn.click(
        compare,
        inputs=[goal, constraints, extra, state, temperature, top_p, seed, temperature_b, top_p_b, seed_b, force_sections],
        outputs=[proto_a, proto_b]
    )

    def do_clear():
        return "", "", "", [], ""
    clear_btn.click(do_clear, outputs=[goal, constraints, extra, state, proto])

    def do_save_pdf(s, text):
        path = save_pdf(s, text or "")
        return path
    saved_pdf = gr.File(label="Download PDF")
    save_pdf_btn.click(do_save_pdf, inputs=[state, proto], outputs=[saved_pdf])


if __name__ == "__main__":
    demo.launch()
'''

REQS = """gradio>=4.44.0
transformers>=4.44.0
accelerate>=0.33.0
huggingface_hub>=0.23.0
bitsandbytes>=0.43.1
sentencepiece>=0.2.0
reportlab>=4.0.0
peft>=0.11.0
datasets>=2.20.0
"""

README = """# Wet‑Lab Protocol Assistant (BioMistral‑7B LoRA)
A Gradio app that generates structured wet‑lab protocols (Title, Objective, Materials, Safety, Steps, QC, Troubleshooting, Notes) from a research goal and constraints.
- Streaming outputs
- Compare variants (A/B decoding)
- Export to PDF

> Model: `ruchirnamjoshi/BioMistralFinetuned`
"""

with open(os.path.join(SPACE_DIR, "app.py"), "w", encoding="utf-8") as f:
    f.write(APP_CODE)
with open(os.path.join(SPACE_DIR, "requirements.txt"), "w", encoding="utf-8") as f:
    f.write(REQS)
with open(os.path.join(SPACE_DIR, "README.md"), "w", encoding="utf-8") as f:
    f.write(README)

print("✅ Space files written to", SPACE_DIR)

✅ Space files written to /content/space


In [42]:
from huggingface_hub import HfApi, create_repo, upload_folder

SPACE_ID = "ruchirnamjoshi/protocol-assistant"

api = HfApi()

# Create the Space (sdk=gradio). If it already exists, set exist_ok=True.
api.create_repo(
    repo_id=SPACE_ID,
    repo_type="space",
    space_sdk="gradio",
    private=False,
    exist_ok=True,
)

# Upload everything in /content/space
upload_folder(
    repo_id=SPACE_ID,
    repo_type="space",
    folder_path=SPACE_DIR,
    commit_message="Initial commit: app.py + requirements + README",
)

space_url = f"https://huggingface.co/spaces/{SPACE_ID}"
print("🚀 Deployed Space:", space_url)
print("Next: open the Space page → Settings → Hardware → pick a GPU (e.g., T4) → Restart Space.")

🚀 Deployed Space: https://huggingface.co/spaces/ruchirnamjoshi/protocol-assistant
Next: open the Space page → Settings → Hardware → pick a GPU (e.g., T4) → Restart Space.


In [40]:
from pathlib import Path

SPACE_DIR = "/content/space"   # same as before
Path(SPACE_DIR).mkdir(parents=True, exist_ok=True)

README_WITH_META = """---
title: Wet‑Lab Protocol Assistant
emoji: 🧪
colorFrom: indigo
colorTo: green
sdk: gradio
sdk_version: "4.44.0"
app_file: app.py
pinned: false
license: apache-2.0
---

# Wet‑Lab Protocol Assistant (BioMistral‑7B LoRA)
A Gradio app that generates structured wet‑lab protocols (Title, Objective, Materials, Safety, Steps, QC, Troubleshooting, Notes) from a research goal and constraints.

- Streaming outputs
- Compare variants (A/B decoding)
- Export to PDF

**Model:** `ruchirnamjoshi/BioMistralFinetuned`
"""

Path(f"{SPACE_DIR}/README.md").write_text(README_WITH_META, encoding="utf-8")
print("✅ README.md updated with Spaces config")

✅ README.md updated with Spaces config


In [41]:
Path(f"{SPACE_DIR}/runtime.txt").write_text("python-3.10", encoding="utf-8")

11

In [36]:
from huggingface_hub import upload_folder

SPACE_ID = "ruchirnamjsohi/protocol-assistant"
upload_folder(
    repo_id=SPACE_ID,
    repo_type="space",
    folder_path=SPACE_DIR,
    commit_message="Fix: add Spaces README front-matter + runtime",
)
print("🚀 Pushed. Open your Space and click Restart if needed.")

RepositoryNotFoundError: 404 Client Error. (Request ID: Root=1-68a76274-55a112c27194af45129660a8;200f0719-d51a-496b-821a-c86f1487a9b8)

Repository Not Found for url: https://huggingface.co/api/spaces/ruchirnamjsohi/protocol-assistant/preupload/main.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated. For more details, see https://huggingface.co/docs/huggingface_hub/authentication
Note: Creating a commit assumes that the repo already exists on the Huggingface Hub. Please use `create_repo` if it's not the case.