conda create -n performance python=3.10
conda activate performance
pip install -e . -r ./setup/requirements_performance.txt
conda create -n light python=3.10
conda activate light
pip install -e . r ./setup/requirements_light.txtsbatch run_performance.sh llama_8b_wanda_50 knowledgebash sweep.shLocal pruned/distilled models must be generated first — see
compress/README.md
conda create -n vllm python=3.10
conda activate vllm
pip install -r ./setup/requirements_vllm.txt
conda create -n trustllm python=3.10
conda activate trustllm
pip install -r ./setup/requirements_trust.txtcd TrustLLM- Edit
generate_all.py— setMODEL_PATHto your model path - Register the model in
config.py— add"/path/to/model": "model_name"tomodel_mapand append"model_name"to theopenai_modelarray - Serve the model on GPU:
conda activate vllm
vllm serve "/path/to/model" \
--host 0.0.0.0 \
--port 8000 \
--dtype auto \
--api-key localtoken \
--served-model-name model_name- In
config.pysetopenai_key="localtoken"andopenai_api_base="http://localhost:8000/v1" - SSH to the same compute node and run generation:
conda activate trustllm
python generate_all.pyResponses are saved to
UniComp/TrustLLM/generation_results/{model_name}/
Responses are evaluated by GPT-4 Turbo — an OpenAI API key is required.
- Swap to OpenAI in
config.py: setopenai_key="YOUR_KEY"andopenai_api_base="https://api.openai.com/v1" - Run evaluation:
conda activate trustllm
python evaluate.py --model_name "path/to/model"Or via SLURM (uncomment the trustllm lines in run.sh):
sbatch run.shResults are printed to stdout.
sbatch run_performance.sh