You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/patrickstar/llm_learn/Llama3-Finetuning/script/../finetune_llama3.py", line 448, in
train()
File "/home/patrickstar/llm_learn/Llama3-Finetuning/script/../finetune_llama3.py", line 441, in train
trainer.train()
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/transformers/trainer.py", line 1780, in train
return inner_training_loop(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/transformers/trainer.py", line 1936, in _inner_training_loop
model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/accelerate/accelerator.py", line 1255, in prepare
result = self._prepare_deepspeed(*args)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/accelerate/accelerator.py", line 1640, in _prepare_deepspeed
engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/init.py", line 181, in initialize
engine = DeepSpeedEngine(args=args,
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 307, in init
self._configure_optimizer(optimizer, model_parameters)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1232, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1309, in _configure_basic_optimizer
optimizer = FusedAdam(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py", line 94, in init
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py", line 480, in load
return self.jit_load(verbose)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py", line 524, in jit_load
op_module = load(name=self.name,
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1308, in load
return _jit_compile(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1710, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1823, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2116, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'fused_adam'
The text was updated successfully, but these errors were encountered:
一直报错fused_adam的错误,我的设备是双卡2080ti,版本和requirement一样
FAILED: multi_tensor_adam.cuda.o
/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -I/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include/TH -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include/THC -isystem /home/patrickstar/miniconda3/envs/pytorch/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=compute_75 -gencode=arch=compute_75,code=sm_75 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_75,code=compute_75 -std=c++17 -c /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o
gcc: fatal error: cannot execute 'cc1plus': execvp: No such file or directory
compilation terminated.
nvcc fatal : Failed to preprocess host compiler properties.
[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="_gcc" -DPYBIND11_STDLIB="_libstdcpp" -DPYBIND11_BUILD_ABI="_cxxabi1011" -I/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include/TH -isystem /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/include/THC -isystem /home/patrickstar/miniconda3/envs/pytorch/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -c /home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o
ninja: build stopped: subcommand failed.
Traceback (most recent call last):
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2100, in _run_ninja_build
subprocess.run(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/subprocess.py", line 526, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/patrickstar/llm_learn/Llama3-Finetuning/script/../finetune_llama3.py", line 448, in
train()
File "/home/patrickstar/llm_learn/Llama3-Finetuning/script/../finetune_llama3.py", line 441, in train
trainer.train()
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/transformers/trainer.py", line 1780, in train
return inner_training_loop(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/transformers/trainer.py", line 1936, in _inner_training_loop
model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/accelerate/accelerator.py", line 1255, in prepare
result = self._prepare_deepspeed(*args)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/accelerate/accelerator.py", line 1640, in _prepare_deepspeed
engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/init.py", line 181, in initialize
engine = DeepSpeedEngine(args=args,
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 307, in init
self._configure_optimizer(optimizer, model_parameters)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1232, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1309, in _configure_basic_optimizer
optimizer = FusedAdam(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py", line 94, in init
fused_adam_cuda = FusedAdamBuilder().load()
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py", line 480, in load
return self.jit_load(verbose)
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py", line 524, in jit_load
op_module = load(name=self.name,
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1308, in load
return _jit_compile(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1710, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1823, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/patrickstar/miniconda3/envs/pytorch/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2116, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'fused_adam'
The text was updated successfully, but these errors were encountered: