In [None]:
# Cell 1: Install dependencies (run once).
# In Colab, prefix with `!` when running these commands in a code cell.
!pip install -q unsloth transformers accelerate bitsandbytes safetensors fastapi uvicorn
# Restart the kernel after large installs if needed (Colab may require runtime restart).

Cell 2: Load repository and the UnsloTh model into a variable named `llm`. Run this cell once; subsequent cells can reuse `llm` without reloading.

In [None]:
# Cell 2: load model once
import os
import sys
# Ensure repo root is on sys.path (adjust path if your notebook root differs)
sys.path.insert(0, '/content/reflexion')  # change to the path where you cloned the repo
from hotpotqa_runs.unsloth_llm import UnslothLLM
# Set HF token if you need to access gated models
os.environ['HUGGINGFACE_API_TOKEN'] = '<YOUR_HF_TOKEN>'  # or set in notebook secrets
MODEL = 'unsloth/Meta-Llama-3.1-8B-bnb-4bit'
# Create the LLM once. This may take a few minutes the first time.
llm = UnslothLLM(model_name=MODEL, token=os.environ.get('HUGGINGFACE_API_TOKEN'), load_in_4bit=True, max_seq_length=8192)
print('LLM ready:', llm)

Cell 3: Call the runner with the pre-initialized `llm`. Build an argparse-like namespace and pass `external_llm=llm` to `run()`.

In [None]:
# Cell 3: run the PubMedQA runner using the already-loaded llm
import argparse
from types import SimpleNamespace
from hotpotqa_runs import run_pubmedqa

# Construct an args-like object. Adjust dataset/model/limit as needed.
args = SimpleNamespace(
    dataset='qiaojin/PubMedQA',
    split='validation',
    limit=5,
    model='unsloth/Meta-Llama-3.1-8B-bnb-4bit',
    use_transformers=False,
    use_unsloth=True,
    hf_token=os.environ.get('HUGGINGFACE_API_TOKEN'),
    out=None,
    agent='react',
    dataset_config='pqa_labeled',
    reflexion_strategy='reflexion',
    max_steps=6,
    question_field=None,
    context_field=None,
    answer_field=None,
    device=None,
    load_in_4bit=True,
    max_seq_length=8192,
)

# Call runner with external llm to avoid reloading the model
run_pubmedqa.run(args, external_llm=llm)

# You can now re-run this cell multiple times with different `args.limit` or dataset slices without reloading the model.

Cell 4: Optional â€” interactive prompt using the same `llm` (quick tests).

In [None]:
# Cell 4: quick interactive call
prompt = 'Summarize the clinical problem of diabetes in one sentence.'
print(llm(prompt))