diff --git a/README.md b/README.md index 42e9be125..3728bb401 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ bash install.sh # Compute Requirements -1. To run a **validator**, you will need at least 24GB of VRAM. -2. To run the default huggingface **miner**, you will need at least 18GB of VRAM. +1. To run a **validator**, you will need at least 62GB of VRAM. +2. To run the default huggingface **miner**, you will need at least 62GB of VRAM. @@ -77,10 +77,11 @@ For ease of use, you can run the scripts as well with PM2. Installation of PM2 i sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update ``` -Example of running a SOLAR miner: +Example of running a Llama3 miner: + ```bash -pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name solar_miner -- --netuid 1 --subtensor.network finney --wallet.name my_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --axon.port 21988 --logging.debug -``` +pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name llama3_miner -- --netuid 1 --subtensor.network finney --wallet.name my_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --neuron.load_in_4bit True --axon.port 21988 --logging.debug +``` # Testnet We highly recommend that you run your miners on testnet before deploying on main. This is give you an opportunity to debug your systems, and ensure that you will not lose valuable immunity time. The SN1 testnet is **netuid 61**. @@ -90,7 +91,7 @@ In order to run on testnet, you will need to go through the same hotkey registra To run: ```bash -pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name solar_miner -- --netuid 61 --subtensor.network test --wallet.name my_test_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --axon.port 21988 --logging.debug +pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name llama3_miner -- --netuid 61 --subtensor.network test --wallet.name my_test_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --neuron.load_in_4bit True --axon.port 21988 --logging.debug ``` # Limitations diff --git a/prompting/llms/hf.py b/prompting/llms/hf.py index 869b32411..be46dc1b8 100644 --- a/prompting/llms/hf.py +++ b/prompting/llms/hf.py @@ -19,7 +19,7 @@ from typing import List, Dict import bittensor as bt -from transformers import Pipeline, pipeline, AutoTokenizer, TextIteratorStreamer +from transformers import BitsAndBytesConfig, pipeline, AutoTokenizer, TextIteratorStreamer from prompting.mock import MockPipeline from prompting.cleaners.cleaner import CleanerPipeline from transformers import pipeline, TextIteratorStreamer, AutoTokenizer @@ -83,12 +83,15 @@ def load_hf_pipeline( streamer=streamer, ) else: + kwargs = model_kwargs.copy() + kwargs["bnb_4bit_compute_dtype"] = kwargs.pop("torch_dtype") + quant_config = BitsAndBytesConfig(**kwargs) llm_pipeline = pipeline( "text-generation", model=model_id, tokenizer=tokenizer, device_map=device, - model_kwargs=model_kwargs, + quant_config=quant_config, streamer=streamer, ) diff --git a/prompting/utils/config.py b/prompting/utils/config.py index 2f4c148f6..b869d4fbd 100644 --- a/prompting/utils/config.py +++ b/prompting/utils/config.py @@ -83,7 +83,7 @@ def add_args(cls, parser): "--neuron.llm_max_allowed_memory_in_gb", type=int, help="The max gpu memory utilization set for initializing the model. This parameter currently reflects on the property `gpu_memory_utilization` of vllm", - default=60, + default=62, ) parser.add_argument( diff --git a/requirements.txt b/requirements.txt index 9a91e8df0..c0d558a6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ datasets==2.14.6 deprecation==2.1.0 torch==2.1.1 torchmetrics -transformers==4.36.2 pre-commit==3.3.2 git+https://github.com/synapse-alpha/mathgenerator.git@main#egg=mathgenerator numpy==1.22.0 @@ -24,4 +23,6 @@ python-dotenv wikipedia_sections vllm loguru -argostranslate \ No newline at end of file +argostranslate +transformers==4.41.1 +autoawq==0.2.5