# PackLLM – MMLU Fusion (Colab)
This Colab notebook installs dependencies with compatible pinning, clones the repo, and runs the *opt*‐fusion experiment on the MMLU `abstract_algebra` subject using `NousResearch/Llama-2-7b-hf` and three peer 7-B models. Adjust the `SUBJECTS` list to run more subjects.

In [None]:
# ⚙️ Install library stack (quiet) with known-good versions
!pip -q install transformers==4.41.1 accelerate datasets evaluate scikit-learn sentencepiece \
    fsspec==2023.6.0 gcsfs==2023.6.0 --upgrade

In [None]:
# 📦 Clone repo
!git clone -q https://github.com/tenet-diver/ppack_of_llamas.git
%cd ppack_of_llamas

In [None]:
# 🔧 Install repo requirements (these already include matching fsspec/gcsfs pins)
!pip -q install -r downstream_tasks/requirements.txt

In [None]:
import subprocess, os, textwrap
SUBJECTS = ['abstract_algebra']  # ← add more subjects here
MODEL_NAMES = ','.join(['NousResearch/Llama-2-7b-hf',
    'mistralai/Mistral-7B-v0.1',
    'microsoft/phi-2',
    'Deci/DeciLM-7B'])
for subj in SUBJECTS:
    cmd = textwrap.dedent(f'''\
        python downstream_tasks/main.py \n          --task_name mmlu \n          --subj {subj} \n          --fusion opt \n          --model_name "{MODEL_NAMES}" \n          --few_shot 0 \n          --data_cache_dir datasets \n          --output_dir outputs/fusion_opt_mmlu/{subj} \n          --annotation_size 5 \n          --seed 1\
    ''')
    print(cmd)
    subprocess.run(cmd, shell=True, check=True)

In [None]:
# 📊 Gather JSON metrics produced (if any)
import glob, json, pandas as pd, pathlib
files = glob.glob('outputs/fusion_opt_mmlu/**/*.json', recursive=True)
if files:
    rows = []
    for fp in files:
        with open(fp) as f:
            rows.append({'file': pathlib.Path(fp).name, **json.load(f)})
    pd.set_option('display.max_rows', None)
    display(pd.DataFrame(rows))
else:
    print('No JSON results yet.')